From d63b70da4439979f0054507b64ee5c922dffd209 Mon Sep 17 00:00:00 2001
From: shenwei41 <shenwei41@huawei.com>
Date: Wed, 3 Feb 2021 22:10:37 +0800
Subject: [PATCH] update include file

---
 inc/external/acl/acl.h                        |    8 +-
 inc/external/acl/acl/acl.h                    |   73 +
 inc/external/acl/acl/acl_base.h               |  617 +++
 inc/external/acl/acl/acl_mdl.h                | 1210 +++++
 inc/external/acl/acl/acl_op.h                 |  549 ++
 inc/external/acl/acl/acl_op_compiler.h        |  115 +
 inc/external/acl/acl/acl_prof.h               |  296 +
 inc/external/acl/acl/acl_rt.h                 |  950 ++++
 inc/external/acl/acl/acl_tdt.h                |  283 +
 .../acl/acl/error_codes/ge_error_codes.h      |   75 +
 .../acl/acl/error_codes/rt_error_codes.h      |  102 +
 inc/external/acl/acl/ops/acl_cblas.h          |  431 ++
 inc/external/acl/acl/ops/acl_dvpp.h           | 2493 +++++++++
 inc/external/acl/acl/ops/acl_fv.h             |  351 ++
 inc/external/acl/acl_base.h                   |   85 +-
 inc/external/acl/acl_mdl.h                    |  356 +-
 inc/external/acl/acl_op.h                     |  119 +-
 inc/external/acl/acl_op_compiler.h            |   47 +-
 inc/external/acl/acl_prof.h                   |   42 +-
 inc/external/acl/acl_rt.h                     |  140 +-
 inc/external/acl/acl_tdt.h                    |   31 +-
 inc/external/acl/error_codes/ge_error_codes.h |   14 +
 inc/external/acl/error_codes/rt_error_codes.h |  135 +-
 inc/external/acl/ops/acl_cblas.h              |  179 +-
 inc/external/acl/ops/acl_dvpp.h               |  369 +-
 inc/external/acl/ops/acl_fv.h                 |   14 +-
 inc/external/hccl/hccl/hccl.h                 |  133 +
 inc/external/hccl/hccl/hccl_types.h           |  101 +
 inc/external/runtime/runtime/rt_error_codes.h |  102 +
 .../aicpu/aicpu_schedule/aicpu_op_type_list.h |   60 +
 .../inc/inc/aicpu/common/aicpu_task_struct.h  |   37 +
 .../fwkacllib/inc/inc/cce/aicpu_engine.h      |   62 +
 .../inc/inc/cce/aicpu_engine_struct.h         |   56 +
 .../fwkacllib/inc/inc/cce/blas_struct.h       |   31 +
 third_party/fwkacllib/inc/inc/cce/cce.h       |  101 +
 third_party/fwkacllib/inc/inc/cce/cce_def.hpp |  152 +
 .../inc/inc/cce/common/attr_list.hpp          |   82 +
 .../fwkacllib/inc/inc/cce/common/catch.hpp    |   95 +
 .../fwkacllib/inc/inc/cce/compiler_stub.h     |   36 +
 third_party/fwkacllib/inc/inc/cce/customize.h |   60 +
 third_party/fwkacllib/inc/inc/cce/dnn.h       |   23 +
 third_party/fwkacllib/inc/inc/cce/dnn_base.h  |  676 +++
 .../fwkacllib/inc/inc/cce/dnn_base_def.hpp    |  994 ++++
 third_party/fwkacllib/inc/inc/cce/dnn_op.h    | 4838 +++++++++++++++++
 .../fwkacllib/inc/inc/cce/dnn_struct.hpp      |   23 +
 .../fwkacllib/inc/inc/cce/dnn_struct_base.hpp |  894 +++
 .../fwkacllib/inc/inc/cce/fwk_adpt_struct.h   |  130 +
 .../fwkacllib/inc/inc/cce/l2fusion_struct.hpp |   56 +
 .../inc/inc/cce/optimizer/fusion_engine.h     |   65 +
 .../fwkacllib/inc/inc/cce/taskdown_api.h      |   54 +
 .../fwkacllib/inc/inc/cce/taskdown_common.hpp |  107 +
 third_party/fwkacllib/inc/inc/hccl/base.h     |  129 +
 third_party/fwkacllib/inc/inc/hccl/hcom.h     |  179 +
 third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h |  142 +
 .../inc/inc/mmpa/sub_inc/mmpa_linux.h         |  561 ++
 .../inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h |   98 +
 .../inc/inc/mmpa/sub_inc/mmpa_typedef_win.h   |   83 +
 .../fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h |  566 ++
 third_party/fwkacllib/inc/inc/ops/aipp.h      |   78 +
 third_party/fwkacllib/inc/inc/ops/all_ops.h   |   80 +
 third_party/fwkacllib/inc/inc/ops/array_ops.h | 1231 +++++
 third_party/fwkacllib/inc/inc/ops/audio_ops.h |  162 +
 third_party/fwkacllib/inc/inc/ops/batch_ops.h |  166 +
 .../fwkacllib/inc/inc/ops/bitwise_ops.h       |   59 +
 .../fwkacllib/inc/inc/ops/boosted_trees_ops.h |   64 +
 .../inc/inc/ops/candidate_sampling_ops.h      |  415 ++
 .../fwkacllib/inc/inc/ops/condtake_ops.h      |   59 +
 .../fwkacllib/inc/inc/ops/control_flow_ops.h  |  407 ++
 third_party/fwkacllib/inc/inc/ops/ctc_ops.h   |  142 +
 .../fwkacllib/inc/inc/ops/data_flow_ops.h     | 2344 ++++++++
 .../inc/inc/ops/elewise_calculation_ops.h     | 3735 +++++++++++++
 .../fwkacllib/inc/inc/ops/functional_ops.h    |  333 ++
 .../fwkacllib/inc/inc/ops/get_data_ops.h      |  103 +
 third_party/fwkacllib/inc/inc/ops/hcom_ops.h  |  284 +
 third_party/fwkacllib/inc/inc/ops/hvd_ops.h   |   81 +
 third_party/fwkacllib/inc/inc/ops/image_ops.h | 1539 ++++++
 .../fwkacllib/inc/inc/ops/internal_ops.h      |   84 +
 .../fwkacllib/inc/inc/ops/linalg_ops.h        |  443 ++
 third_party/fwkacllib/inc/inc/ops/list_ops.h  |  230 +
 .../fwkacllib/inc/inc/ops/logging_ops.h       |  116 +
 .../fwkacllib/inc/inc/ops/lookup_ops.h        |  308 ++
 third_party/fwkacllib/inc/inc/ops/math_ops.h  |  957 ++++
 .../inc/inc/ops/matrix_calculation_ops.h      | 1048 ++++
 .../fwkacllib/inc/inc/ops/nn_batch_norm_ops.h |  485 ++
 .../inc/inc/ops/nn_calculation_ops.h          | 1711 ++++++
 .../fwkacllib/inc/inc/ops/nn_detect_ops.h     | 1654 ++++++
 .../fwkacllib/inc/inc/ops/nn_norm_ops.h       | 1279 +++++
 third_party/fwkacllib/inc/inc/ops/nn_ops.h    |   53 +
 .../fwkacllib/inc/inc/ops/nn_pooling_ops.h    | 1608 ++++++
 .../fwkacllib/inc/inc/ops/nn_training_ops.h   | 2598 +++++++++
 third_party/fwkacllib/inc/inc/ops/no_op.h     |   41 +
 .../fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h |  889 +++
 .../inc/inc/ops/npu_loss_scale_ops.h          |  122 +
 .../fwkacllib/inc/inc/ops/outfeed_ops.h       |   27 +
 third_party/fwkacllib/inc/inc/ops/pad_ops.h   |  445 ++
 .../fwkacllib/inc/inc/ops/parsing_ops.h       |   56 +
 .../fwkacllib/inc/inc/ops/quantize_ops.h      |  224 +
 .../fwkacllib/inc/inc/ops/ragged_array_ops.h  |   65 +
 .../inc/inc/ops/ragged_conversion_ops.h       |   98 +
 .../fwkacllib/inc/inc/ops/ragged_math_ops.h   |   60 +
 .../fwkacllib/inc/inc/ops/random_ops.h        |  554 ++
 .../fwkacllib/inc/inc/ops/reduce_ops.h        | 1173 ++++
 .../inc/inc/ops/resource_variable_ops.h       |  114 +
 third_party/fwkacllib/inc/inc/ops/rnn.h       |  965 ++++
 third_party/fwkacllib/inc/inc/ops/rpn_ops.h   |   61 +
 third_party/fwkacllib/inc/inc/ops/save_ops.h  |   42 +
 third_party/fwkacllib/inc/inc/ops/sdca_ops.h  |   92 +
 .../fwkacllib/inc/inc/ops/selection_ops.h     | 2174 ++++++++
 third_party/fwkacllib/inc/inc/ops/set_ops.h   |  181 +
 .../fwkacllib/inc/inc/ops/sparse_ops.h        | 1047 ++++
 .../fwkacllib/inc/inc/ops/spectral_ops.h      |  148 +
 .../inc/inc/ops/split_combination_ops.h       |  389 ++
 third_party/fwkacllib/inc/inc/ops/state_ops.h |  167 +
 .../inc/inc/ops/stateful_random_ops.h         |  236 +
 .../inc/inc/ops/stateless_random_ops.h        |   84 +
 .../fwkacllib/inc/inc/ops/string_ops.h        |  562 ++
 .../fwkacllib/inc/inc/ops/swap_co_ops.h       |   62 +
 .../inc/inc/ops/target_crop_and_resize.h      |   59 +
 .../inc/inc/ops/transformation_ops.h          |  721 +++
 .../inc/inc/ops/warp_perspective_ops.h        |   59 +
 .../inc/inc/register/op_kernel_registry.h     |   49 +
 .../fwkacllib/inc/inc/register/op_registry.h  |   96 +
 third_party/fwkacllib/inc/inc/runtime/base.h  |  358 ++
 .../fwkacllib/inc/inc/runtime/config.h        |  210 +
 .../fwkacllib/inc/inc/runtime/context.h       |  165 +
 third_party/fwkacllib/inc/inc/runtime/dev.h   |  369 ++
 .../fwkacllib/inc/inc/runtime/dvfsprofile.h   |   63 +
 third_party/fwkacllib/inc/inc/runtime/event.h |  246 +
 .../fwkacllib/inc/inc/runtime/kernel.h        |  601 ++
 third_party/fwkacllib/inc/inc/runtime/mem.h   |  543 ++
 third_party/fwkacllib/inc/inc/runtime/rt.h    |   31 +
 .../fwkacllib/inc/inc/runtime/rt_model.h      |  470 ++
 .../fwkacllib/inc/inc/runtime/stream.h        |  196 +
 .../inc/inc/soft_dp/ExternalSoftDp.h          |   52 +
 .../fwkacllib/inc/inc/tdt/data_common.h       |   99 +
 .../fwkacllib/inc/inc/tdt/index_transform.h   |   29 +
 third_party/fwkacllib/inc/inc/tdt/status.h    |  763 +++
 .../inc/inc/tdt/tdt_host_interface.h          |  210 +
 .../fwkacllib/inc/inc/tdt/tsd_client.h        |  195 +
 .../inc/inc/toolchain/adx_datadump_server.h   |   42 +
 .../fwkacllib/inc/inc/toolchain/plog.h        |   59 +
 .../inc/inc/toolchain/prof_acl_api.h          |  112 +
 .../inc/inc/toolchain/prof_callback.h         |  135 +
 .../fwkacllib/inc/inc/toolchain/prof_engine.h |  207 +
 .../inc/inc/toolchain/prof_mgr_core.h         |   93 +
 .../inc/inc/toolchain/prof_reporter.h         |   85 +
 .../fwkacllib/inc/inc/toolchain/slog.h        |  510 ++
 .../inc/inc/toolchain/tuning_tool/tune_api.h  |  137 +
 148 files changed, 60086 insertions(+), 622 deletions(-)
 create mode 100644 inc/external/acl/acl/acl.h
 create mode 100644 inc/external/acl/acl/acl_base.h
 create mode 100644 inc/external/acl/acl/acl_mdl.h
 create mode 100644 inc/external/acl/acl/acl_op.h
 create mode 100644 inc/external/acl/acl/acl_op_compiler.h
 create mode 100644 inc/external/acl/acl/acl_prof.h
 create mode 100644 inc/external/acl/acl/acl_rt.h
 create mode 100644 inc/external/acl/acl/acl_tdt.h
 create mode 100644 inc/external/acl/acl/error_codes/ge_error_codes.h
 create mode 100644 inc/external/acl/acl/error_codes/rt_error_codes.h
 create mode 100644 inc/external/acl/acl/ops/acl_cblas.h
 create mode 100644 inc/external/acl/acl/ops/acl_dvpp.h
 create mode 100644 inc/external/acl/acl/ops/acl_fv.h
 create mode 100644 inc/external/hccl/hccl/hccl.h
 create mode 100644 inc/external/hccl/hccl/hccl_types.h
 create mode 100644 inc/external/runtime/runtime/rt_error_codes.h
 create mode 100644 third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
 create mode 100644 third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/aicpu_engine.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/blas_struct.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/cce.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/cce_def.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/common/catch.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/compiler_stub.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/customize.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn_base.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn_op.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/taskdown_api.h
 create mode 100644 third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp
 create mode 100644 third_party/fwkacllib/inc/inc/hccl/base.h
 create mode 100644 third_party/fwkacllib/inc/inc/hccl/hcom.h
 create mode 100644 third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h
 create mode 100644 third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h
 create mode 100644 third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h
 create mode 100644 third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h
 create mode 100644 third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/aipp.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/all_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/array_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/audio_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/batch_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/bitwise_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/condtake_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/control_flow_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/ctc_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/data_flow_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/functional_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/get_data_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/hcom_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/hvd_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/image_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/internal_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/linalg_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/list_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/logging_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/lookup_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/math_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nn_training_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/no_op.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/outfeed_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/pad_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/parsing_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/quantize_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/random_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/reduce_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/resource_variable_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/rnn.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/rpn_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/save_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/sdca_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/selection_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/set_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/sparse_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/spectral_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/split_combination_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/state_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/stateful_random_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/stateless_random_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/string_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/swap_co_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/target_crop_and_resize.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/transformation_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/ops/warp_perspective_ops.h
 create mode 100644 third_party/fwkacllib/inc/inc/register/op_kernel_registry.h
 create mode 100644 third_party/fwkacllib/inc/inc/register/op_registry.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/base.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/config.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/context.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/dev.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/dvfsprofile.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/event.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/kernel.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/mem.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/rt.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/rt_model.h
 create mode 100644 third_party/fwkacllib/inc/inc/runtime/stream.h
 create mode 100644 third_party/fwkacllib/inc/inc/soft_dp/ExternalSoftDp.h
 create mode 100644 third_party/fwkacllib/inc/inc/tdt/data_common.h
 create mode 100644 third_party/fwkacllib/inc/inc/tdt/index_transform.h
 create mode 100644 third_party/fwkacllib/inc/inc/tdt/status.h
 create mode 100644 third_party/fwkacllib/inc/inc/tdt/tdt_host_interface.h
 create mode 100644 third_party/fwkacllib/inc/inc/tdt/tsd_client.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/adx_datadump_server.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/plog.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/prof_acl_api.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/prof_callback.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/prof_engine.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/prof_mgr_core.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/prof_reporter.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/slog.h
 create mode 100644 third_party/fwkacllib/inc/inc/toolchain/tuning_tool/tune_api.h

diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h
index ef5b4772..eae87835 100644
--- a/inc/external/acl/acl.h
+++ b/inc/external/acl/acl.h
@@ -26,9 +26,9 @@ extern "C" {
 #endif
 
 // Current version is 1.0.0
-#define ACL_MAJOR_VERSION 1
-#define ACL_MINOR_VERSION 0
-#define ACL_PATCH_VERSION 0
+#define ACL_MAJOR_VERSION    1
+#define ACL_MINOR_VERSION    0
+#define ACL_PATCH_VERSION    0
 
 /**
  * @ingroup AscendCL
@@ -70,4 +70,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_H_
+#endif // INC_EXTERNAL_ACL_ACL_H_
diff --git a/inc/external/acl/acl/acl.h b/inc/external/acl/acl/acl.h
new file mode 100644
index 00000000..eae87835
--- /dev/null
+++ b/inc/external/acl/acl/acl.h
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_H_
+#define INC_EXTERNAL_ACL_ACL_H_
+
+#include "acl_rt.h"
+#include "acl_op.h"
+#include "acl_mdl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Current version is 1.0.0
+#define ACL_MAJOR_VERSION    1
+#define ACL_MINOR_VERSION    0
+#define ACL_PATCH_VERSION    0
+
+/**
+ * @ingroup AscendCL
+ * @brief acl initialize
+ *
+ * @par Restriction
+ * The aclInit interface can be called only once in a process
+ * @param configPath [IN]    the config path,it can be NULL
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief acl finalize
+ *
+ * @par Restriction
+ * Need to call aclFinalize before the process exits.
+ * After calling aclFinalize,the services cannot continue to be used normally.
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief query ACL interface version
+ *
+ * @param majorVersion[OUT] ACL interface major version
+ * @param minorVersion[OUT] ACL interface minor version
+ * @param patchVersion[OUT] ACL interface patch version
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_H_
diff --git a/inc/external/acl/acl/acl_base.h b/inc/external/acl/acl/acl_base.h
new file mode 100644
index 00000000..0b520002
--- /dev/null
+++ b/inc/external/acl/acl/acl_base.h
@@ -0,0 +1,617 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
+#define INC_EXTERNAL_ACL_ACL_BASE_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "error_codes/rt_error_codes.h"
+#include "error_codes/ge_error_codes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#endif
+
+#ifdef __GNUC__
+#define ACL_DEPRECATED __attribute__((deprecated))
+#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message)))
+#elif defined(_MSC_VER)
+#define ACL_DEPRECATED __declspec(deprecated)
+#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message))
+#else
+#define ACL_DEPRECATED
+#define ACL_DEPRECATED_MESSAGE(message)
+#endif
+
+typedef void *aclrtStream;
+typedef void *aclrtEvent;
+typedef void *aclrtContext;
+typedef int aclError;
+typedef uint16_t aclFloat16;
+typedef struct aclDataBuffer aclDataBuffer;
+typedef struct aclTensorDesc aclTensorDesc;
+
+static const int ACL_ERROR_NONE = 0;
+static const int ACL_SUCCESS = 0;
+
+static const int ACL_ERROR_INVALID_PARAM = 100000;
+static const int ACL_ERROR_UNINITIALIZE = 100001;
+static const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
+static const int ACL_ERROR_INVALID_FILE = 100003;
+static const int ACL_ERROR_WRITE_FILE = 100004;
+static const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
+static const int ACL_ERROR_PARSE_FILE = 100006;
+static const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
+static const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
+static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
+static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
+static const int ACL_ERROR_INVALID_MODEL_ID = 100011;
+static const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
+static const int ACL_ERROR_PARSE_MODEL = 100013;
+static const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
+static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
+static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
+static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
+static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
+static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
+static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
+static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
+static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
+static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
+static const int ACL_ERROR_OP_NOT_FOUND = 100024;
+static const int ACL_ERROR_OP_LOAD_FAILED = 100025;
+static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
+static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
+static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
+static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
+static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
+static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
+static const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
+static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
+static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
+static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
+static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
+static const int ACL_ERROR_REPEAT_FINALIZE = 100037;
+static const int ACL_ERROR_NOT_STATIC_AIPP = 100038;
+static const int ACL_ERROR_COMPILING_STUB_MODE = 100039;
+static const int ACL_ERROR_GROUP_NOT_SET = 100040;
+static const int ACL_ERROR_GROUP_NOT_CREATE = 100041;
+static const int ACL_ERROR_PROF_ALREADY_RUN = 100042;
+static const int ACL_ERROR_PROF_NOT_RUN = 100043;
+static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044;
+static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
+static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
+static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
+static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
+static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
+
+static const int ACL_ERROR_BAD_ALLOC = 200000;
+static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
+static const int ACL_ERROR_INVALID_DEVICE = 200002;
+static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
+static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
+static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
+static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;
+static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007;
+
+static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;
+
+static const int ACL_ERROR_INTERNAL_ERROR = 500000;
+static const int ACL_ERROR_FAILURE = 500001;
+static const int ACL_ERROR_GE_FAILURE = 500002;
+static const int ACL_ERROR_RT_FAILURE = 500003;
+static const int ACL_ERROR_DRV_FAILURE = 500004;
+static const int ACL_ERROR_PROFILING_FAILURE = 500005;
+
+#define ACL_TENSOR_SHAPE_RANGE_NUM 2
+#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE
+
+typedef enum {
+    ACL_DT_UNDEFINED = -1,
+    ACL_FLOAT = 0,
+    ACL_FLOAT16 = 1,
+    ACL_INT8 = 2,
+    ACL_INT32 = 3,
+    ACL_UINT8 = 4,
+    ACL_INT16 = 6,
+    ACL_UINT16 = 7,
+    ACL_UINT32 = 8,
+    ACL_INT64 = 9,
+    ACL_UINT64 = 10,
+    ACL_DOUBLE = 11,
+    ACL_BOOL = 12,
+    ACL_STRING = 13,
+} aclDataType;
+
+typedef enum {
+    ACL_FORMAT_UNDEFINED = -1,
+    ACL_FORMAT_NCHW = 0,
+    ACL_FORMAT_NHWC = 1,
+    ACL_FORMAT_ND = 2,
+    ACL_FORMAT_NC1HWC0 = 3,
+    ACL_FORMAT_FRACTAL_Z = 4,
+    ACL_FORMAT_NC1HWC0_C04 = 12,
+    ACL_FORMAT_NDHWC = 27,
+    ACL_FORMAT_FRACTAL_NZ = 29,
+    ACL_FORMAT_NCDHW = 30,
+    ACL_FORMAT_NDC1HWC0 = 32,
+    ACL_FRACTAL_Z_3D = 33
+} aclFormat;
+
+typedef enum {
+    ACL_DEBUG = 0,
+    ACL_INFO = 1,
+    ACL_WARNING = 2,
+    ACL_ERROR = 3,
+} aclLogLevel;
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type aclFloat16 to data of type float
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type float to data of type aclFloat16
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of aclDataBuffer
+ *
+ * @param data [IN]    pointer to data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval pointer to created instance. nullptr if run out of memory
+ *
+ * @see aclrtMalloc | aclrtFree
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of aclDataBuffer
+ *
+ * @par Function
+ *  Only the aclDataBuffer type data is destroyed here.
+ *  The memory of the data passed in when the aclDataDataBuffer interface
+ *  is called to create aclDataBuffer type data must be released by the user
+ *
+ * @param  dataBuffer [IN]   pointer to the aclDataBuffer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief update new data of aclDataBuffer
+ *
+ * @param dataBuffer [OUT]    pointer to aclDataBuffer
+ * @li The old data need to be released by the user, otherwise it may occur memory leak leakage
+ *  call aclGetDataBufferAddr interface to get old data address
+ *  call aclrtFree interface to release memory
+ *
+ * @param data [IN]    pointer to new data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
+ */
+ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data address from aclDataBuffer
+ *
+ * @param dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data address
+ */
+ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead")
+ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get size of aclDataType
+ *
+ * @param  dataType [IN]    aclDataType data the size to get
+ *
+ * @retval size of the aclDataType
+ */
+ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
+
+// interfaces of tensor desc
+/**
+ * @ingroup AscendCL
+ * @brief create data aclTensorDesc
+ *
+ * @param  dataType [IN]    Data types described by tensor
+ * @param  numDims [IN]     the number of dimensions of the shape
+ * @param  dims [IN]        the size of the specified dimension
+ * @param  format [IN]      tensor format
+ *
+ * @retval aclTensorDesc pointer.
+ * @retval nullptr if param is invalid or run out of memory
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
+                                                       int numDims,
+                                                       const int64_t *dims,
+                                                       aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data aclTensorDesc
+ *
+ * @param desc [IN]     pointer to the data of aclTensorDesc to destroy
+ */
+ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor shape range for aclTensorDesc
+ *
+ * @param  desc [OUT]     pointer to the data of aclTensorDesc
+ * @param  dimsCount [IN]     the number of dimensions of the shape
+ * @param  dimsRange [IN]     the range of dimensions of the shape
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
+                                                    size_t dimsCount,
+                                                    int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data type specified by the tensor description
+ *
+ * @param desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data type specified by the tensor description.
+ * @retval ACL_DT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data format specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data format specified by the tensor description.
+ * @retval ACL_FORMAT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor size specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data size specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get element count specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval element count specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get number of dims specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval number of dims specified by the tensor description.
+ * @retval 0 if description is null
+ * @retval ACL_UNKNOWN_RANK if the tensor dim is -2
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ *
+ * @retval dim specified by the tensor description and index.
+ * @retval -1 if description or index is invalid
+ */
+ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead")
+ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimSize [OUT]    size of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the range of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimRangeNum [IN]     number of dimRange.
+ * @param  dimRange [OUT]       range of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
+                                                      size_t index,
+                                                      size_t dimRangeNum,
+                                                      int64_t *dimRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor description name
+ *
+ * @param desc [OUT]       pointer to the instance of aclTensorDesc
+ * @param name [IN]        tensor description name
+ */
+ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor description name
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval tensor description name.
+ * @retval empty string if description is null
+ */
+ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Convert the format in the source aclTensorDesc according to
+ * the specified dstFormat to generate a new target aclTensorDesc.
+ * The format in the source aclTensorDesc remains unchanged.
+ *
+ * @param  srcDesc [IN]     pointer to the source tensor desc
+ * @param  dstFormat [IN]   destination format
+ * @param  dstDesc [OUT]    pointer to the pointer to the destination tensor desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
+    aclTensorDesc **dstDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param desc [IN]     pointer to tensor description
+ * @param index [IN]    index of tensor
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+*/
+ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get address of tensor
+ *
+ * @param desc [IN]    pointer to tensor description
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+*/
+ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the dynamic input name specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dynamicInputName [IN]       pointer to the dynamic input name
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set const data specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dataBuffer [IN]       pointer to the const databuffer
+ * @param  length [IN]       the length of const databuffer
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief an interface for users to output  APP logs
+ *
+ * @param logLevel [IN]    the level of current log
+ * @param func [IN]        the function where the log is located
+ * @param file [IN]        the file where the log is located
+ * @param line [IN]        Number of source lines where the log is located
+ * @param fmt [IN]         the format of current log
+ * @param ... [IN]         the value of current log
+ */
+ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
+    const char *fmt, ...);
+
+#define ACL_APP_LOG(level, fmt, ...) \
+    aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
diff --git a/inc/external/acl/acl/acl_mdl.h b/inc/external/acl/acl/acl_mdl.h
new file mode 100644
index 00000000..4f3e257f
--- /dev/null
+++ b/inc/external/acl/acl/acl_mdl.h
@@ -0,0 +1,1210 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_
+#define INC_EXTERNAL_ACL_ACL_MODEL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_MAX_DIM_CNT          128
+#define ACL_MAX_TENSOR_NAME_LEN  128
+#define ACL_MAX_BATCH_NUM        128
+#define ACL_MAX_HW_NUM           128
+#define ACL_MAX_SHAPE_COUNT      128
+#define ACL_INVALID_NODE_INDEX   0xFFFFFFFF
+
+#define ACL_MDL_LOAD_FROM_FILE            1
+#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM   2
+#define ACL_MDL_LOAD_FROM_MEM             3
+#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM    4
+#define ACL_MDL_LOAD_FROM_FILE_WITH_Q     5
+#define ACL_MDL_LOAD_FROM_MEM_WITH_Q      6
+
+#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
+#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
+
+typedef struct aclmdlDataset aclmdlDataset;
+typedef struct aclmdlDesc aclmdlDesc;
+typedef struct aclmdlAIPP aclmdlAIPP;
+typedef struct aclAippExtendInfo aclAippExtendInfo;
+typedef struct aclmdlConfigHandle aclmdlConfigHandle;
+
+typedef enum {
+    ACL_YUV420SP_U8 = 1,
+    ACL_XRGB8888_U8,
+    ACL_RGB888_U8,
+    ACL_YUV400_U8,
+    ACL_NC1HWC0DI_FP16,
+    ACL_NC1HWC0DI_S8,
+    ACL_ARGB8888_U8,
+    ACL_YUYV_U8,
+    ACL_YUV422SP_U8,
+    ACL_AYUV444_U8,
+    ACL_RAW10,
+    ACL_RAW12,
+    ACL_RAW16,
+    ACL_RAW24,
+    ACL_AIPP_RESERVED = 0xffff,
+} aclAippInputFormat;
+
+typedef enum {
+    ACL_MDL_PRIORITY_INT32 = 0,
+    ACL_MDL_LOAD_TYPE_SIZET,
+    ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
+    ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
+    ACL_MDL_MEM_SIZET,
+    ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
+    ACL_MDL_WEIGHT_SIZET,
+    ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
+    ACL_MDL_WORKSPACE_SIZET,
+    ACL_MDL_INPUTQ_NUM_SIZET,
+    ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
+    ACL_MDL_OUTPUTQ_NUM_SIZET,
+    ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
+} aclmdlConfigAttr;
+
+typedef enum {
+    ACL_DATA_WITHOUT_AIPP = 0,
+    ACL_DATA_WITH_STATIC_AIPP,
+    ACL_DATA_WITH_DYNAMIC_AIPP,
+    ACL_DYNAMIC_AIPP_NODE
+} aclmdlInputAippType;
+
+typedef struct aclmdlIODims {
+    char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
+    size_t dimCount;  /**< dim array count */
+    int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
+} aclmdlIODims;
+
+typedef struct aclAippDims {
+    aclmdlIODims srcDims; /**< input dims before model transform */
+    size_t srcSize; /**< input size before model transform */
+    aclmdlIODims aippOutdims; /**< aipp output dims */
+    size_t aippOutSize; /**< aipp output size */
+} aclAippDims;
+
+typedef struct aclmdlBatch {
+    size_t batchCount; /**< batch array count */
+    uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
+} aclmdlBatch;
+
+typedef struct aclmdlHW {
+    size_t hwCount; /**< height&width array count */
+    uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
+} aclmdlHW;
+
+typedef struct aclAippInfo {
+    aclAippInputFormat inputFormat;
+    int32_t srcImageSizeW;
+    int32_t srcImageSizeH;
+    int8_t cropSwitch;
+    int32_t loadStartPosW;
+    int32_t loadStartPosH;
+    int32_t cropSizeW;
+    int32_t cropSizeH;
+    int8_t resizeSwitch;
+    int32_t resizeOutputW;
+    int32_t resizeOutputH;
+    int8_t paddingSwitch;
+    int32_t leftPaddingSize;
+    int32_t rightPaddingSize;
+    int32_t topPaddingSize;
+    int32_t bottomPaddingSize;
+    int8_t cscSwitch;
+    int8_t rbuvSwapSwitch;
+    int8_t axSwapSwitch;
+    int8_t singleLineMode;
+    int32_t matrixR0C0;
+    int32_t matrixR0C1;
+    int32_t matrixR0C2;
+    int32_t matrixR1C0;
+    int32_t matrixR1C1;
+    int32_t matrixR1C2;
+    int32_t matrixR2C0;
+    int32_t matrixR2C1;
+    int32_t matrixR2C2;
+    int32_t outputBias0;
+    int32_t outputBias1;
+    int32_t outputBias2;
+    int32_t inputBias0;
+    int32_t inputBias1;
+    int32_t inputBias2;
+    int32_t meanChn0;
+    int32_t meanChn1;
+    int32_t meanChn2;
+    int32_t meanChn3;
+    float minChn0;
+    float minChn1;
+    float minChn2;
+    float minChn3;
+    float varReciChn0;
+    float varReciChn1;
+    float varReciChn2;
+    float varReciChn3;
+    aclFormat srcFormat;
+    aclDataType srcDatatype;
+    size_t srcDimNum;
+    size_t shapeCount;
+    aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
+    aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
+} aclAippInfo;
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDesc
+ *
+ * @retval the aclmdlDesc pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   Pointer to almdldlDesc to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get aclmdlDesc data of the model according to the model ID
+ *
+ * @param  modelDesc [OUT]   aclmdlDesc pointer
+ * @param  modelId [IN]      model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the inputs of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval input size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the output of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval output size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified input according to
+ *        the data of type aclmdlDesc
+ *
+ * @param  modelDesc [IN]  aclmdlDesc pointer
+ * @param  index [IN] the size of the number of inputs to be obtained,
+ *         the index value starts from 0
+ *
+ * @retval Specify the size of the input
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified output according to
+ *        the data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   aclmdlDesc pointer
+ * @param index [IN]  the size of the number of outputs to be obtained,
+ *        the index value starts from 0
+ *
+ * @retval Specify the size of the output
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDataset
+ *
+ * @retval the aclmdlDataset pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDataset
+ *
+ * @param  dataset [IN]  Pointer to aclmdlDataset to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add aclDataBuffer to aclmdlDataset
+ *
+ * @param dataset [OUT]    aclmdlDataset address of aclDataBuffer to be added
+ * @param dataBuffer [IN]  aclDataBuffer address to be added
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of aclDataBuffer in aclmdlDataset
+ *
+ * @param dataset [IN]   aclmdlDataset poiter
+ *
+ * @retval the number of aclDataBuffer
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the aclDataBuffer in aclmdlDataset by index
+ *
+ * @param dataset [IN]   aclmdlDataset poiter
+ * @param index [IN]     the index of aclDataBuffer
+ *
+ * @retval Get successfully, return the address of aclDataBuffer
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from files
+ * and manage memory internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory and manage the memory of
+ * model running internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model,  size_t modelSize,
+                                               uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from a file,
+ * and the user manages the memory of the model run by itself
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations.
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after finishes loading the model
+ * @param workPtr [IN]     A pointer to the working memory
+ *                         required by the model on the Device,can be null
+ * @param workSize [IN]    The amount of working memory required by the model
+ * @param weightPtr [IN]   Pointer to model weight memory on Device
+ * @param weightSize [IN]  The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
+                                                       uint32_t *modelId, void *workPtr, size_t workSize,
+                                                       void *weightPtr, size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory,
+ * and the user can manage the memory of model running
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after finishes loading the model
+ * @param workPtr [IN]    A pointer to the working memory
+ *                        required by the model on the Device,can be null
+ * @param workSize [IN]   work memory size
+ * @param weightPtr [IN]  Pointer to model weight memory on Device,can be null
+ * @param weightSize [IN] The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
+                                                      uint32_t *modelId, void *workPtr, size_t workSize,
+                                                      void *weightPtr, size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from file with async queue
+ *
+ * @param modelPath  [IN] model path
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ,
+                                                     size_t inputQNum, const uint32_t *outputQ, size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from memory with async queue
+ *
+ * @param model [IN]      model memory which user manages
+ * @param modelSize [IN]  model size
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
+                                                    const uint32_t *inputQ, size_t inputQNum,
+                                                    const uint32_t *outputQ, size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model synchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model asynchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ * @param  stream [IN]    stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
+                                                aclmdlDataset *output, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief unload model with model id
+ *
+ * @param  modelId [IN]   model id to be unloaded
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the weight memory size and working memory size
+ * required for model execution according to the model file
+ *
+ * @param  fileName [IN]     Model path to get memory information
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the weights required for
+ * model execution according to the model data in memory
+ *
+ * @par Restriction
+ * The execution and weight memory is Device memory,
+ * and requires user application and release.
+ * @param  model [IN]        model memory which user manages
+ * @param  modelSize [IN]    model data size
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize,
+                                                    size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief In dynamic batch scenarios,
+ * it is used to set the number of images processed
+ * at one time during model inference
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  batchSize [IN]   Number of images processed at a time during model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the H and W of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  height [IN]      model height
+ * @param  width [IN]       model width
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                    uint64_t height, uint64_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the dynamic dims of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic dims
+ * @param  dims [IN]        value of dynamic dims
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       const aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param dims [OUT]  dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDimsV2
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info(version 2), especially for static aipp
+ * it is the same with aclmdlGetInputDims while model without static aipp
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     input tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDims
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output dims info
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get current output dims info
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Get current output shape when model is dynamic and
+ * dynamic shape info is set
+ * @li Get max output shape when model is dynamic and
+ * dynamic shape info is not set
+ * @li Get actual output shape when model is static
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  intput tensor index
+ *
+ * @retval input tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  output tensor index
+ *
+ * @retval output tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]    intput tensor name
+ * @param index [OUT]  intput tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]  output tensor name
+ * @param index [OUT]  output tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic batch info
+ *
+ * @param modelDesc [IN]  model description
+ * @param batch [OUT]  dynamic batch info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic height&width info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param hw [OUT]  dynamic height&width info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic gear count
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param gearCount [OUT]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index,
+                                                            size_t *gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param dims [OUT]  value of dynamic dims
+ * @param gearCount [IN]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims,
+                                                       size_t gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlAIPP
+ *
+ * @param batchSize [IN]    batchsizes of model
+ *
+ * @retval the aclmdlAIPP pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlAIPP
+ *
+ * @param aippParmsSet [IN]    Pointer for aclmdlAIPP to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set InputFormat of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param inputFormat [IN]    The inputFormat of aipp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cscParms of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param csc_switch [IN]       Csc switch
+ * @param cscMatrixR0C0 [IN]    Csc_matrix_r0_c0
+ * @param cscMatrixR0C1 [IN]    Csc_matrix_r0_c1
+ * @param cscMatrixR0C2 [IN]    Csc_matrix_r0_c2
+ * @param cscMatrixR1C0 [IN]    Csc_matrix_r1_c0
+ * @param cscMatrixR1C1 [IN]    Csc_matrix_r1_c1
+ * @param cscMatrixR1C2 [IN]    Csc_matrix_r1_c2
+ * @param cscMatrixR2C0 [IN]    Csc_matrix_r2_c0
+ * @param cscMatrixR2C1 [IN]    Csc_matrix_r2_c1
+ * @param cscMatrixR2C2 [IN]    Csc_matrix_r2_c2
+ * @param cscOutputBiasR0 [IN]  Output Bias for RGB to YUV, element of row 0, unsigned number
+ * @param cscOutputBiasR1 [IN]  Output Bias for RGB to YUV, element of row 1, unsigned number
+ * @param cscOutputBiasR2 [IN]  Output Bias for RGB to YUV, element of row 2, unsigned number
+ * @param cscInputBiasR0 [IN]   Input Bias for YUV to RGB, element of row 0, unsigned number
+ * @param cscInputBiasR1 [IN]   Input Bias for YUV to RGB, element of row 1, unsigned number
+ * @param cscInputBiasR2 [IN]   Input Bias for YUV to RGB, element of row 2, unsigned number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
+                                                    int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
+                                                    int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
+                                                    int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
+                                                    uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
+                                                    uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
+                                                    uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
+
+/**
+ * @ingroup AscendCL
+ * @brief set rb/ub swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param rbuvSwapSwitch [IN] rb/ub swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param axSwapSwitch [IN]   RGBA->ARGB, YUVA->AYUV swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set source image of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param srcImageSizeW [IN]  Source image width
+ * @param srcImageSizeH [IN]  Source image height
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
+                                                       int32_t srcImageSizeH);
+
+/**
+ * @ingroup AscendCL
+ * @brief set resize switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param scfSwitch [IN]      Resize switch
+ * @param scfInputSizeW [IN]  Input width of scf
+ * @param scfInputSizeH [IN]  Input height of scf
+ * @param scfOutputSizeW [IN] Output width of scf
+ * @param scfOutputSizeH [IN] Output height of scf
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
+                                                    int8_t scfSwitch,
+                                                    int32_t scfInputSizeW,
+                                                    int32_t scfInputSizeH,
+                                                    int32_t scfOutputSizeW,
+                                                    int32_t scfOutputSizeH,
+                                                    uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cropParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param cropSwitch [IN]     Crop switch
+ * @param cropStartPosW [IN]  The start horizontal position of cropping
+ * @param cropStartPosH [IN]  The start vertical position of cropping
+ * @param cropSizeW [IN]      Crop width
+ * @param cropSizeH [IN]      Crop height
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
+                                                     int8_t cropSwitch,
+                                                     int32_t cropStartPosW,
+                                                     int32_t cropStartPosH,
+                                                     int32_t cropSizeW,
+                                                     int32_t cropSizeH,
+                                                     uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set paddingParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param paddingSwitch [IN]      Padding switch
+ * @param paddingSizeTop [IN]     Top padding size
+ * @param paddingSizeBottom [IN]  Bottom padding size
+ * @param paddingSizeLeft [IN]    Left padding size
+ * @param paddingSizeRight [IN]   Right padding size
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
+                                                        int32_t paddingSizeTop, int32_t paddingSizeBottom,
+                                                        int32_t paddingSizeLeft, int32_t paddingSizeRight,
+                                                        uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMean of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param dtcPixelMeanChn0 [IN]   Mean value of channel 0
+ * @param dtcPixelMeanChn1 [IN]   Mean value of channel 1
+ * @param dtcPixelMeanChn2 [IN]   Mean value of channel 2
+ * @param dtcPixelMeanChn3 [IN]   Mean value of channel 3
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
+                                                       int16_t dtcPixelMeanChn0,
+                                                       int16_t dtcPixelMeanChn1,
+                                                       int16_t dtcPixelMeanChn2,
+                                                       int16_t dtcPixelMeanChn3,
+                                                       uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMin of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param dtcPixelMinChn0 [IN]  Min value of channel 0
+ * @param dtcPixelMinChn1 [IN]  Min value of channel 1
+ * @param dtcPixelMinChn2 [IN]  Min value of channel 2
+ * @param dtcPixelMinChn3 [IN]  Min value of channel 3
+ * @param batchIndex [IN]       Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
+                                                      float dtcPixelMinChn0,
+                                                      float dtcPixelMinChn1,
+                                                      float dtcPixelMinChn2,
+                                                      float dtcPixelMinChn3,
+                                                      uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set PixelVarReci of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]       Pointer for aclmdlAIPP
+ * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0
+ * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1
+ * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2
+ * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3
+ * @param batchIndex [IN]          Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
+                                                       float dtcPixelVarReciChn0,
+                                                       float dtcPixelVarReciChn1,
+                                                       float dtcPixelVarReciChn2,
+                                                       float dtcPixelVarReciChn3,
+                                                       uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for aipp data(ACL_DYNAMIC_AIPP_NODE)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
+                                                aclmdlDataset *dataset,
+                                                size_t index,
+                                                const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
+                                                       aclmdlDataset *dataset,
+                                                       size_t index,
+                                                       const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input aipp type
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of input
+ * @param type [OUT]          aipp type for input.refrer to aclmdlInputAippType(enum)
+ * @param dynamicAttachedDataIndex [OUT]     index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE)
+ *        valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
+                                               size_t index,
+                                               aclmdlInputAippType *type,
+                                               size_t *dynamicAttachedDataIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief get static aipp parameters from model
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of tensor
+ * @param aippinfo [OUT]      Pointer for static aipp info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param deviceId [IN]       device id
+ * @param streamId [IN]       stream id
+ * @param taskId [IN]         task id
+ * @param opName [OUT]        pointer to op name
+ * @param opNameLen [IN]      the length of op name
+ * @param inputDesc [OUT]     pointer to input description
+ * @param numInputs [OUT]     the number of input tensor
+ * @param outputDesc [OUT]    pointer to output description
+ * @param numOutputs [OUT]    the number of output tensor
+ *
+ * @retval ACL_SUCCESS The function is successfully executed
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
+    uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
+    aclTensorDesc **outputDesc, size_t *numOutputs);
+
+/**
+ * @ingroup AscendCL
+ * @brief init dump
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief set param of dump
+ *
+ * @param dumpCfgPath [IN]   the path of dump config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief finalize dump.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief load model with config
+ *
+ * @param handle [IN]    pointer to model config handle
+ * @param modelId [OUT]  pointer to model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create model config handle of type aclmdlConfigHandle
+ *
+ * @retval the aclmdlConfigHandle pointer
+ *
+ * @see aclmdlDestroyConfigHandle
+*/
+ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlConfigHandle
+ *
+ * @param handle [IN]   pointer to model config handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateConfigHandle
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief set config for model load
+ *
+ * @param handle [OUT]    pointer to model config handle
+ * @param attr [IN]       config attr in model config handle to be set
+ * @param attrValue [IN]  pointer to model config value
+ * @param valueSize [IN]  memory size of attrValue
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
+    const void *attrValue, size_t valueSize);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
diff --git a/inc/external/acl/acl/acl_op.h b/inc/external/acl/acl/acl_op.h
new file mode 100644
index 00000000..b1be0d6e
--- /dev/null
+++ b/inc/external/acl/acl/acl_op.h
@@ -0,0 +1,549 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_H_
+#define INC_EXTERNAL_ACL_ACL_OP_H_
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclopHandle aclopHandle;
+typedef struct aclopAttr aclopAttr;
+typedef struct aclopKernelDesc aclopKernelDesc;
+
+typedef void (*aclDataDeallocator)(void *data, size_t length);
+
+static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;
+
+typedef enum aclEngineType {
+    ACL_ENGINE_SYS,
+    ACL_ENGINE_AICORE,
+    ACL_ENGINE_VECTOR,
+} aclopEngineType;
+
+/**
+ * @ingroup AscendCL
+ * @brief Set base directory that contains single op models
+ *
+ * @par Restriction
+ * The aclopSetModelDir interface can be called only once in a process.
+ * @param  modelDir [IN]   path of the directory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);
+
+/**
+ * @ingroup AscendCL
+ * @brief load single op models from memory
+ *
+ * @par Restriction
+ * The aclopLoad interface can be called more than one times in a process.
+ * @param model [IN]        address of single op models
+ * @param modelSize [IN]    size of single op models
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of type aclopAttr
+ *
+ * @retval pointer to created instance.
+ * @retval nullptr if run out of memory
+ */
+ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of typ aclopAttr
+ *
+ * @param attr [IN]   pointer to the instance of aclopAttr
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is bool
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *                         false if attrValue is 0, true otherwise.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is int64_t
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is float
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is string
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of bools
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
+    const uint8_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
+    const int64_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of floats
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
+    const float *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of strings
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
+    const char **values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numLists [IN]    number of lists
+ * @param numValues [IN]   pointer to number of values of each list
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
+                                                     const char *attrName,
+                                                     int numLists,
+                                                     const int *numValues,
+                                                     const int64_t *const values[]);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator asynchronously
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecute,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
+ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
+                                          int numInputs,
+                                          const aclTensorDesc *const inputDesc[],
+                                          const aclDataBuffer *const inputs[],
+                                          int numOutputs,
+                                          const aclTensorDesc *const outputDesc[],
+                                          aclDataBuffer *const outputs[],
+                                          const aclopAttr *attr,
+                                          aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator
+ *        The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecuteV2,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN|OUT]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
+                                            int numInputs,
+                                            aclTensorDesc *inputDesc[],
+                                            aclDataBuffer *inputs[],
+                                            int numOutputs,
+                                            aclTensorDesc *outputDesc[],
+                                            aclDataBuffer *outputs[],
+                                            aclopAttr *attr,
+                                            aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a instance of aclopHandle.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param opAttr [IN]      pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
+                                               int numInputs,
+                                               const aclTensorDesc *const inputDesc[],
+                                               int numOutputs,
+                                               const aclTensorDesc *const outputDesc[],
+                                               const aclopAttr *opAttr,
+                                               aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy aclopHandle instance
+ *
+ * @param handle [IN]   pointer to the instance of aclopHandle
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief execute an op with the handle.
+ *        can save op model matching cost compared with aclopExecute
+ *
+ * @param handle [IN]      pointer to the instance of aclopHandle.
+ *                         The aclopCreateHandle interface has been called
+ *                         in advance to create aclopHandle type data.
+ * @param numInputs [IN]   number of inputs
+ * @param inputs [IN]      pointer to array of input buffers.
+ *                         The aclCreateDataBuffer interface has been called
+ *                         in advance to create aclDataBuffer type data.
+ * @param numOutputs [IN]  number of outputs
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCreateHandle | aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
+                                                 int numInputs,
+                                                 const aclDataBuffer *const inputs[],
+                                                 int numOutputs,
+                                                 aclDataBuffer *const outputs[],
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief cast data type
+ *
+ * @param srcDesc [IN]     source tensor desc
+ * @param srcBuffer [IN]   source tensor buffer
+ * @param dstDesc [IN]     destination tensor desc
+ * @param dstBuffer [OUT]  destination tensor buffer
+ * @param truncate [IN]    do not truncate if value is 0, truncate otherwise
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
+                                       const aclDataBuffer *srcBuffer,
+                                       const aclTensorDesc *dstDesc,
+                                       aclDataBuffer *dstBuffer,
+                                       uint8_t truncate,
+                                       aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for casting datatype
+ *
+ * @param srcDesc [IN]    source tensor desc
+ * @param dstDesc [IN]    destination tensor desc
+ * @param truncate [IN]   do not truncate if value is 0, truncate otherwise
+ * @param handle [OUT]    pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
+                                                      aclTensorDesc *dstDesc,
+                                                      uint8_t truncate,
+                                                      aclopHandle **handle);
+
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param opType [IN]           op type
+ * @param kernelId [IN]         kernel id
+ * @param kernelName [IN]       kernel name
+ * @param binData [IN]          kernel bin data
+ * @param binSize [IN]          kernel bin size
+ * @param enginetype [IN]       enigne type
+ * @param deallocator [IN]      callback function for deallocating bin data,
+ *                              null if bin data to be deallocated by caller
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCompile
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
+                                               const char *kernelId,
+                                               const char *kernelName,
+                                               void *binData,
+                                               int binSize,
+                                               aclopEngineType enginetype,
+                                               aclDataDeallocator deallocator);
+
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param numInputs [IN]            number of inputs
+ * @param inputDesc [IN]            pointer to array of input tensor descriptions
+ * @param numOutputs [IN]           number of outputs
+ * @param outputDesc [IN]           pointer to array of output tensor descriptions
+ * @param opAttr [IN]               pointer to instance of aclopAttr
+ * @param aclopKernelDesc [IN]      pointer to instance of aclopKernelDesc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+typedef aclError (*aclopCompileFunc)(int numInputs,
+                                     const aclTensorDesc *const inputDesc[],
+                                     int numOutputs,
+                                     const aclTensorDesc *const outputDesc[],
+                                     const aclopAttr *opAttr,
+                                     aclopKernelDesc *aclopKernelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief register compile function
+ *
+ * @param opType [IN]         op type
+ * @param func [IN]           compile function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopUnregisterCompileFunc
+ */
+ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);
+
+/**
+ * @ingroup AscendCL
+ * @brief unregister compile function
+ *
+ * @param opType [IN]         op type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
+
+/**
+ * @ingroup AscendCL
+ * @brief set kernel args
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param kernelId [IN]                 kernel id
+ * @param blockDim [IN]                 block dim
+ * @param args [IN]                     args
+ * @param argSize [IN]                  size in bytes of args
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
+                                                const char *kernelId,
+                                                uint32_t blockDim,
+                                                const void *args,
+                                                uint32_t argSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief set workspace sizes
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param numWorkspaces [IN]            number of workspaces
+ * @param workspaceSizes [IN]           pointer to array of sizes of workspaces
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
+                                                          size_t *workspaceSizes);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op with dynamic shape
+ *
+ * @param opType [IN]       op type
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [IN]   pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
+                                               int numInputs,
+                                               const aclTensorDesc *const inputDesc[],
+                                               int numOutputs,
+                                               const aclTensorDesc *const outputDesc[],
+                                               const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief inferShape the specified operator synchronously
+ *
+ * @param opType [IN]       type of op
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param inputs [IN]       pointer to array of input buffers
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [OUT]  pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
+                                             int numInputs,
+                                             aclTensorDesc *inputDesc[],
+                                             aclDataBuffer *inputs[],
+                                             int numOutputs,
+                                             aclTensorDesc *outputDesc[],
+                                             aclopAttr *attr);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_OP_H_
diff --git a/inc/external/acl/acl/acl_op_compiler.h b/inc/external/acl/acl/acl_op_compiler.h
new file mode 100644
index 00000000..6bbb855c
--- /dev/null
+++ b/inc/external/acl/acl/acl_op_compiler.h
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+
+#include "acl_base.h"
+#include "acl_op.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclCompileType {
+    ACL_COMPILE_SYS,
+    ACL_COMPILE_UNREGISTERED
+} aclopCompileType;
+
+typedef enum {
+    ACL_PRECISION_MODE,
+    ACL_AICORE_NUM,
+    ACL_AUTO_TUNE_MODE,
+    ACL_OP_SELECT_IMPL_MODE,
+    ACL_OPTYPELIST_FOR_IMPLMODE,
+    ACL_OP_DEBUG_LEVEL,
+    ACL_DEBUG_DIR,
+    ACL_OP_COMPILER_CACHE_MODE,
+    ACL_OP_COMPILER_CACHE_DIR
+} aclCompileOpt;
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param attr [IN]           pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
+                                          int numInputs,
+                                          const aclTensorDesc *const inputDesc[],
+                                          int numOutputs,
+                                          const aclTensorDesc *const outputDesc[],
+                                          const aclopAttr *attr,
+                                          aclopEngineType engineType,
+                                          aclopCompileType compileFlag,
+                                          const char *opPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile and execute op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ * @param stream [IN]           stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
+    int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
+    const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
+    const char *opPath, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief set compile option
+ *
+ * @param aclCompileOpt [IN]      compile option
+ * @param value [IN]              pointer for the option value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
diff --git a/inc/external/acl/acl/acl_prof.h b/inc/external/acl/acl/acl_prof.h
new file mode 100644
index 00000000..d2675124
--- /dev/null
+++ b/inc/external/acl/acl/acl_prof.h
@@ -0,0 +1,296 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_PROF_H_
+#define INC_EXTERNAL_ACL_PROF_H_
+
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_PROF_ACL_API                0x0001
+#define ACL_PROF_TASK_TIME              0x0002
+#define ACL_PROF_AICORE_METRICS         0x0004
+#define ACL_PROF_AICPU                  0x0008
+
+#define ACL_PROF_MAX_OP_NAME_LEN        257
+#define ACL_PROF_MAX_OP_TYPE_LEN        65
+
+typedef enum {
+    ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
+    ACL_AICORE_PIPE_UTILIZATION = 1,
+    ACL_AICORE_MEMORY_BANDWIDTH = 2,
+    ACL_AICORE_L0B_AND_WIDTH = 3,
+    ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
+    ACL_AICORE_NONE = 0xFF
+} aclprofAicoreMetrics;
+
+typedef struct aclprofConfig aclprofConfig;
+typedef struct aclprofStopConfig aclprofStopConfig;
+typedef struct aclprofAicoreEvents aclprofAicoreEvents;
+typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling initialize
+ *
+ * @param  profilerResultPath [IN]  path of profiling result
+ * @param  length [IN]              length of profilerResultPath
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofFinalize
+ */
+ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling finalize
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofInit
+ */
+ACL_FUNC_VISIBILITY aclError aclprofFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief Start profiling modules by profilerConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStop
+ */
+ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclprofConfig
+ *
+ * @param  deviceIdList [IN]      list of device id
+ * @param  deviceNums [IN]        number of devices
+ * @param  aicoreMetrics [IN]     type of aicore metrics
+ * @param  aicoreEvents [IN]      pointer to aicore events, only support NULL now
+ * @param  dataTypeConfig [IN]    config modules need profiling
+ *
+ * @retval the aclprofConfig pointer
+ *
+ * @see aclprofDestroyConfig
+ */
+ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
+    aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy data of type aclprofConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief stop profiling modules by stopProfilingConfig
+ *
+ * @param  profilerConfig [IN]  pointer to stop config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStart
+ */
+ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief subscribe profiling data of model
+ *
+ * @param  modelId [IN]              the model id subscribed
+ * @param  profSubscribeConfig [IN]  pointer to config of model subscribe
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelUnSubscribe
+ */
+ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
+    const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief unsubscribe profiling data of model
+ *
+ * @param  modelId [IN]  the model id unsubscribed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelSubscribe
+ */
+ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  timeInfoSwitch [IN] switch whether get time info from model
+ * @param  aicoreMetrics [IN]  aicore metrics
+ * @param  fd [IN]             pointer to write pipe
+ *
+ * @retval the aclprofSubscribeConfig pointer
+ *
+ * @see aclprofDestroySubscribeConfig
+ */
+ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
+    aclprofAicoreMetrics aicoreMetrics, void *fd);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy subscribe config
+ *
+ * @param  profSubscribeConfig [IN]  subscribe config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateSubscribeConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  opDescSize [OUT]  size of op desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op number from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  opNumber [OUT]  op number of subscription data
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opType [OUT]     obtained op type string
+ * @param  opTypeLen [IN]   obtained length of op type string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
+    char *opType, size_t opTypeLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opName [OUT]     obtained op name string
+ * @param  opNameLen [IN]   obtained length of op name string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
+    char *opName, size_t opNameLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get start time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval start time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get end time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval end time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get excution time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval execution time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get model id from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ *
+ * @retval model id of subscription data
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_PROF_H_
diff --git a/inc/external/acl/acl/acl_rt.h b/inc/external/acl/acl/acl_rt.h
new file mode 100644
index 00000000..6fd2da6e
--- /dev/null
+++ b/inc/external/acl/acl/acl_rt.h
@@ -0,0 +1,950 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
+#define INC_EXTERNAL_ACL_ACL_RT_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclrtRunMode {
+    ACL_DEVICE,
+    ACL_HOST,
+} aclrtRunMode;
+
+typedef enum aclrtTsId {
+    ACL_TS_ID_AICORE   = 0,
+    ACL_TS_ID_AIVECTOR = 1,
+    ACL_TS_ID_RESERVED = 2,
+} aclrtTsId;
+
+typedef enum aclrtEventStatus {
+    ACL_EVENT_STATUS_COMPLETE  = 0,
+    ACL_EVENT_STATUS_NOT_READY = 1,
+    ACL_EVENT_STATUS_RESERVED  = 2,
+} aclrtEventStatus;
+
+typedef enum aclrtCallbackBlockType {
+    ACL_CALLBACK_NO_BLOCK,
+    ACL_CALLBACK_BLOCK,
+} aclrtCallbackBlockType;
+
+typedef enum aclrtMemcpyKind {
+    ACL_MEMCPY_HOST_TO_HOST,
+    ACL_MEMCPY_HOST_TO_DEVICE,
+    ACL_MEMCPY_DEVICE_TO_HOST,
+    ACL_MEMCPY_DEVICE_TO_DEVICE,
+} aclrtMemcpyKind;
+
+typedef enum aclrtMemMallocPolicy {
+    ACL_MEM_MALLOC_HUGE_FIRST,
+    ACL_MEM_MALLOC_HUGE_ONLY,
+    ACL_MEM_MALLOC_NORMAL_ONLY,
+    ACL_MEM_MALLOC_HUGE_FIRST_P2P,
+    ACL_MEM_MALLOC_HUGE_ONLY_P2P,
+    ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
+} aclrtMemMallocPolicy;
+
+typedef enum aclrtMemAttr {
+    ACL_DDR_MEM,
+    ACL_HBM_MEM,
+    ACL_DDR_MEM_HUGE,
+    ACL_DDR_MEM_NORMAL,
+    ACL_HBM_MEM_HUGE,
+    ACL_HBM_MEM_NORMAL,
+    ACL_DDR_MEM_P2P_HUGE,
+    ACL_DDR_MEM_P2P_NORMAL,
+    ACL_HBM_MEM_P2P_HUGE,
+    ACL_HBM_MEM_P2P_NORMAL,
+} aclrtMemAttr;
+
+typedef enum aclrtGroupAttr {
+    ACL_GROUP_AICORE_INT,
+    ACL_GROUP_AIV_INT,
+    ACL_GROUP_AIC_INT,
+    ACL_GROUP_SDMANUM_INT,
+    ACL_GROUP_ASQNUM_INT
+} aclrtGroupAttr;
+
+typedef struct tagRtGroupInfo aclrtGroupInfo;
+
+typedef struct rtExceptionInfo aclrtExceptionInfo;
+
+typedef void (*aclrtCallback)(void *userData);
+
+typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set a callback function to handle exception information
+ *
+ * @param callback [IN] callback function to handle exception information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get task id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The task id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The stream id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get thread id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get device id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief The thread that handles the callback function on the Stream
+ *
+ * @param threadId [IN] thread ID
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add a callback function to be executed on the host
+ *        to the task queue of the Stream
+ *
+ * @param fn [IN]   Specify the callback function to be added
+ *                  The function prototype of the callback function is:
+ *                  typedef void (*aclrtCallback)(void *userData);
+ * @param userData [IN]   User data to be passed to the callback function
+ * @param blockType [IN]  callback block type
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief After waiting for a specified time, trigger callback processing
+ *
+ * @par Function
+ *  The thread processing callback specified by
+ *  the aclrtSubscribeReport interface
+ *
+ * @param timeout [IN]   timeout value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSubscribeReport
+ */
+ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Cancel thread registration,
+ *        the callback function on the specified Stream
+ *        is no longer processed by the specified thread
+ *
+ * @param threadId [IN]   thread ID
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create context and associates it with the calling thread
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li If you don't call the aclrtCreateContext interface
+ * to explicitly create the context,
+ * the system will use the default context, which is implicitly created
+ * when the aclrtSetDevice interface is called.
+ * @li If multiple contexts are created in a process
+ * (there is no limit on the number of contexts),
+ * the current thread can only use one of them at the same time.
+ * It is recommended to explicitly specify the context of the current thread
+ * through the aclrtSetCurrentContext interface to increase.
+ * the maintainability of the program.
+ *
+ * @param  context [OUT]    point to the created context
+ * @param  deviceId [IN]    device to create context on
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetDevice | aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy context instance
+ *
+ * @par Function
+ * Can only destroy context created through aclrtCreateContext interface
+ *
+ * @param  context [IN]   the context to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief set the context of the thread
+ *
+ * @par Function
+ * The following scenarios are supported:
+ * @li If the aclrtCreateContext interface is called in a thread to explicitly
+ * create a Context (for example: ctx1), the thread's Context can be specified
+ * without calling the aclrtSetCurrentContext interface.
+ * The system uses ctx1 as the context of thread1 by default.
+ * @li If the aclrtCreateContext interface is not explicitly created,
+ * the system uses the default context as the context of the thread.
+ * At this time, the aclrtDestroyContext interface cannot be used to release
+ * the default context.
+ * @li If the aclrtSetCurrentContext interface is called multiple times to
+ * set the thread's Context, the last one prevails.
+ *
+ * @par Restriction
+ * @li If the cevice corresponding to the context set for the thread
+ * has been reset, you cannot set the context as the context of the thread,
+ * otherwise a business exception will result.
+ * @li It is recommended to use the context created in a thread.
+ * If the aclrtCreateContext interface is called in thread A to create a context,
+ * and the context is used in thread B,
+ * the user must guarantee the execution order of tasks in the same stream
+ * under the same context in two threads.
+ *
+ * @param  context [IN]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext | aclrtDestroyContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the context of the thread
+ *
+ * @par Function
+ * If the user calls the aclrtSetCurrentContext interface
+ * multiple times to set the context of the current thread,
+ * then the last set context is obtained
+ *
+ * @param  context [OUT]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);
+
+/**
+ * @ingroup AscendCL
+ * @brief Specify the device to use for the operation
+ * implicitly create the default context and the default stream
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Device can be specified in the process or thread.
+ * If you call the aclrtSetDevice interface multiple
+ * times to specify the same device,
+ * you only need to call the aclrtResetDevice interface to reset the device.
+ * @li The same device can be specified for operation
+ *  in different processes or threads.
+ * @li Device is specified in a process,
+ * and multiple threads in the process can share this device to explicitly
+ * create a Context (aclrtCreateContext interface).
+ * @li In multi-device scenarios, you can switch to other devices
+ * through the aclrtSetDevice interface in the process.
+ *
+ * @param  deviceId [IN]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtResetDevice |aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset the current operating Device and free resources on the device,
+ * including the default context, the default stream,
+ * and all streams created under the default context,
+ * and synchronizes the interface.
+ * If the task under the default context or stream has not been completed,
+ * the system will wait for the task to complete before releasing it.
+ *
+ * @par Restriction
+ * @li The Context, Stream, and Event that are explicitly created
+ * on the device to be reset. Before resetting,
+ * it is recommended to follow the following interface calling sequence,
+ * otherwise business abnormalities may be caused.
+ * @li Interface calling sequence:
+ * call aclrtDestroyEvent interface to release Event or
+ * call aclrtDestroyStream interface to release explicitly created Stream->
+ * call aclrtDestroyContext to release explicitly created Context->
+ * call aclrtResetDevice interface
+ *
+ * @param  deviceId [IN]   the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target device of current thread
+ *
+ * @param deviceId [OUT]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target side
+ *
+ * @param runMode [OUT]    the run mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Wait for compute device to finish
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set Scheduling TS
+ *
+ * @param tsId [IN]   the ts id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get total device number.
+ *
+ * @param count [OUT]    the device number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create event instance
+ *
+ * @param event [OUT]   created event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy event instance
+ *
+ * @par Function
+ *  Only events created through the aclrtCreateEvent interface can be
+ *  destroyed, synchronous interfaces. When destroying an event,
+ *  the user must ensure that the tasks involved in the aclrtSynchronizeEvent
+ *  interface or the aclrtStreamWaitEvent interface are completed before
+ *  they are destroyed.
+ *
+ * @param  event [IN]   event to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record an Event in the Stream
+ *
+ * @param event [IN]    event to record
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset an event
+ *
+ * @par Function
+ *  Users need to make sure to wait for the tasks in the Stream
+ *  to complete before resetting the Event
+ *
+ * @param event [IN]    event to reset
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);
+
+ /**
+ * @ingroup AscendCL
+ * @brief Queries an event's status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);
+
+/**
+ * @ingroup AscendCL
+ * @brief Block Host Running, wait event to be complete
+ *
+ * @param  event [IN]   event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief computes the elapsed time between events.
+ *
+ * @param ms [OUT]     time between start and end in ms
+ * @param start [IN]   starting event
+ * @param end [IN]     ending event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on device
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMalloc interface needs to be released
+ * through the aclrtFree interface.
+ * @li Before calling the media data processing interface,
+ * if you need to apply memory on the device to store input or output data,
+ * you need to call acldvppMalloc to apply for memory.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | acldvppMalloc | aclrtMallocCached
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
+                                         size_t size,
+                                         aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief allocate memory on device with cache
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMallocCached interface needs to be released
+ * through the aclrtFree interface.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
+                                               size_t size,
+                                               aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief flush cache data to ddr
+ *
+ * @param devPtr [IN]  the pointer that flush data to ddr
+ * @param size [IN]    flush size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief invalidate cache data
+ *
+ * @param devPtr [IN]  pointer to invalidate cache data
+ * @param size [IN]    invalidate size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMalloc interface
+ *
+ * @param  devPtr [IN]  Pointer to memory to be freed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on host
+ *
+ * @par Restriction
+ * @li The requested memory cannot be used in the Device
+ * and needs to be explicitly copied to the Device.
+ * @li The memory requested by the aclrtMallocHost interface
+ * needs to be released through the aclrtFreeHost interface.
+ *
+ * @param  hostPtr [OUT] pointer to pointer to allocated memory on the host
+ * @param  size [IN]     alloc memory size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFreeHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free host memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMallocHost interface
+ *
+ * @param  hostPtr [IN]   free memory pointer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMallocHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief synchronous memory replication between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param destMax [IN]   Max length of the destination address memory
+ * @param src [IN]       source address pointer
+ * @param count [IN]     the length of byte to copy
+ * @param kind [IN]      memcpy type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
+                                         size_t destMax,
+                                         const void *src,
+                                         size_t count,
+                                         aclrtMemcpyKind kind);
+
+/**
+ * @ingroup AscendCL
+ * @brief Initialize memory and set contents of memory to specified value
+ *
+ * @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+ * @param devPtr [IN]    Starting address of memory
+ * @param maxCount [IN]  Max length of destination address memory
+ * @param value [IN]     Set value
+ * @param count [IN]     The length of memory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);
+
+/**
+ * @ingroup AscendCL
+ * @brief  Asynchronous memory replication between Host and Device
+ *
+ * @par Function
+ *  After calling this interface,
+ *  be sure to call the aclrtSynchronizeStream interface to ensure that
+ *  the task of memory replication has been completed
+ *
+ * @par Restriction
+ * @li For on-chip Device-to-Device memory copy,
+ *     both the source and destination addresses must be 64-byte aligned
+ *
+ * @param dst [IN]     destination address pointer
+ * @param destMax [IN] Max length of destination address memory
+ * @param src [IN]     source address pointer
+ * @param count [IN]   the number of byte to copy
+ * @param kind [IN]    memcpy type
+ * @param stream [IN]  asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
+                                              size_t destMax,
+                                              const void *src,
+                                              size_t count,
+                                              aclrtMemcpyKind kind,
+                                              aclrtStream stream);
+
+/**
+* @ingroup AscendCL
+* @brief Asynchronous initialize memory
+* and set contents of memory to specified value async
+*
+* @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+* @param devPtr [IN]      destination address pointer
+* @param maxCount [IN]    Max length of destination address memory
+* @param value [IN]       set value
+* @param count [IN]       the number of byte to set
+* @param stream [IN]      asynchronized task stream
+*
+* @retval ACL_SUCCESS The function is successfully executed.
+* @retval OtherValues Failure
+*
+* @see aclrtSynchronizeStream
+*/
+ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
+                                              size_t maxCount,
+                                              int32_t value,
+                                              size_t count,
+                                              aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief  create stream instance
+ *
+ * @param  stream [OUT]   the created stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy stream instance
+ *
+ * @par Function
+ * Can only destroy streams created through the aclrtCreateStream interface
+ *
+ * @par Restriction
+ * Before calling the aclrtDestroyStream interface to destroy
+ * the specified Stream, you need to call the aclrtSynchronizeStream interface
+ * to ensure that the tasks in the Stream have been completed.
+ *
+ * @param stream [IN]  the stream to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateStream | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief block the host until all tasks
+ * in the specified stream have completed
+ *
+ * @param  stream [IN]   the stream to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Blocks the operation of the specified Stream until
+ * the specified Event is completed.
+ * Support for multiple streams waiting for the same event.
+ *
+ * @param  stream [IN]   the wait stream If using thedefault Stream, set NULL
+ * @param  event [IN]    the event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief set group
+ *
+ * @par Function
+ *  set the task to the corresponding group
+ *
+ * @param groupId [IN]   group id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the number of group
+ *
+ * @par Function
+ *  get the number of group. if the number of group is zero,
+ *  it means that group is not supported or group is not created.
+ *
+ * @param count [OUT]   the number of group
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create group information
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ *
+ * @see aclrtDestroyGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy group information
+ *
+ * @param groupInfo [IN]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get all group information
+ *
+ * @param groupInfo [OUT]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get detail information of group
+ *
+ * @param groupInfo [IN]    pointer to group information
+ * @param groupId [IN]      group index value
+ * @param attr [IN]         group attribute
+ * @param attrValue [OUT]   pointer to attribute value
+ * @param valueLen [IN]     length of attribute value
+ * @param paramRetSize [OUT]   pointer to real length of attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
+                                                     int32_t groupId,
+                                                     aclrtGroupAttr attr,
+                                                     void *attrValue,
+                                                     size_t valueLen,
+                                                     size_t *paramRetSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief checking whether current device and peer device support the p2p feature
+ *
+ * @param canAccessPeer [OUT]   pointer to save the checking result
+ * @param deviceId [IN]         current device id
+ * @param peerDeviceId [IN]     peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief enable the peer device to support the p2p feature
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ * @param flags [IN]   reserved field, now it must be zero
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);
+
+/**
+ * @ingroup AscendCL
+ * @brief disable the peer device to support the p2p function
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the free memory and total memory of specified attribute.
+ * the specified memory include normal memory and huge memory.
+ *
+ * @param attr [IN]    the memory attribute of specified device
+ * @param free [OUT]   the free memory of specified device
+ * @param total [OUT]  the total memory of specified device.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_ACL_RT_H_
+
diff --git a/inc/external/acl/acl/acl_tdt.h b/inc/external/acl/acl/acl_tdt.h
new file mode 100644
index 00000000..61995121
--- /dev/null
+++ b/inc/external/acl/acl/acl_tdt.h
@@ -0,0 +1,283 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
+#define INC_EXTERNAL_ACL_ACL_TDT_H_
+
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum acltdtTensorType {
+    ACL_TENSOR_DATA_UNDEFINED = -1,
+    ACL_TENSOR_DATA_TENSOR,
+    ACL_TENSOR_DATA_END_OF_SEQUENCE,
+    ACL_TENSOR_DATA_ABNORMAL
+};
+
+typedef struct acltdtDataItem acltdtDataItem;
+typedef struct acltdtDataset acltdtDataset;
+typedef struct acltdtChannelHandle acltdtChannelHandle;
+
+/**
+ * @ingroup AscendCL
+ * @brief Get tensor type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Tensor type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Data type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data address from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+*/
+ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data size from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+*/
+ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dim's number from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+*/
+ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims from item
+ *
+ * @param  dataItem [IN]      the struct of data item
+ * @param  dims [IN|OUT]      pointer to the dims of dataTtem
+ * @param  dimNum [IN]        the size of the dims
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the struct of data item
+ *
+ * @param tdtType [IN]  Tdt tensor type
+ * @param dims [IN]     pointer of tdtDataItem's dims
+ * @param dimNum [IN]   Dim number
+ * @param dataType [IN] Data type
+ * @param data [IN]     Data pointer
+ * @param size [IN]     Data size
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
+                                                         const int64_t *dims,
+                                                         size_t dimNum,
+                                                         aclDataType dataType,
+                                                         void *data,
+                                                         size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the struct of data item
+ *
+ * @param dataItem [IN]  pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the tdt dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataset
+ */
+ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the tdt dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataset
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [IN] pointer to the dataset
+ * @param index [IN]   index of the dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtAddDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [OUT] pointer to the dataset
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtGetDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Stop the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the channel
+ *
+ * @param deviceId [IN]  the device id
+ * @param name [IN]      the channel's name
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtStopChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtStopChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Send tensor to device
+ *
+ * @param handle [IN]  pointer to the channel handle
+ * @param dataset [IN] pointer to the dataset
+ * @param timeout [IN] to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtReceiveTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
+                                              const acltdtDataset *dataset,
+                                              int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Receive tensor from device
+ *
+ * @param handle [IN]      pointer to the channel handle
+ * @param dataset [OUT]    pointer to the dataset
+ * @param timeout [IN]     to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtSendTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
+                                                 acltdtDataset *dataset,
+                                                 int32_t timeout);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //INC_EXTERNAL_ACL_ACL_TDT_H_
+
diff --git a/inc/external/acl/acl/error_codes/ge_error_codes.h b/inc/external/acl/acl/error_codes/ge_error_codes.h
new file mode 100644
index 00000000..b477a18c
--- /dev/null
+++ b/inc/external/acl/acl/error_codes/ge_error_codes.h
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
+static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
+static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
+static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
+static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
+static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
+static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021;
+static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022;
+static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
+static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
+static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
+static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
+static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
+static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
+static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
+
+#ifdef __cplusplus
+}  // namespace ge
+#endif
+#endif  // INC_EXTERNAL_GE_GE_ERROR_CODES_H_
diff --git a/inc/external/acl/acl/error_codes/rt_error_codes.h b/inc/external/acl/acl/error_codes/rt_error_codes.h
new file mode 100644
index 00000000..47f16d9f
--- /dev/null
+++ b/inc/external/acl/acl/error_codes/rt_error_codes.h
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS                          = 0; // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/external/acl/acl/ops/acl_cblas.h b/inc/external/acl/acl/ops/acl_cblas.h
new file mode 100644
index 00000000..a2bd8c61
--- /dev/null
+++ b/inc/external/acl/acl/ops/acl_cblas.h
@@ -0,0 +1,431 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclTransType {
+    ACL_TRANS_N,
+    ACL_TRANS_T,
+    ACL_TRANS_NZ,
+    ACL_TRANS_NZ_T
+} aclTransType;
+
+typedef enum aclComputeType {
+    ACL_COMPUTE_HIGH_PRECISION,
+    ACL_COMPUTE_LOW_PRECISION
+} aclComputeType;
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
+    const void *alpha, const void *a, int lda, aclDataType dataTypeA,
+    const void *x, int incx, aclDataType dataTypeX,
+    const void *beta, void *y, int incy, aclDataType dataTypeY,
+    aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+*/
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
+                                                          int m,
+                                                          int n,
+                                                          aclDataType dataTypeA,
+                                                          aclDataType dataTypeX,
+                                                          aclDataType dataTypeY,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
+                                          int m,
+                                          int n,
+                                          const aclFloat16 *alpha,
+                                          const aclFloat16 *a,
+                                          int lda,
+                                          const aclFloat16 *x,
+                                          int incx,
+                                          const aclFloat16 *beta,
+                                          aclFloat16 *y,
+                                          int incy,
+                                          aclComputeType type,
+                                          aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
+                                                         int m,
+                                                         int n,
+                                                         aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
+                                           int m,
+                                           int n,
+                                           const int32_t *alpha,
+                                           const int8_t *a,
+                                           int lda,
+                                           const int8_t *x,
+                                           int incx,
+                                           const int32_t *beta,
+                                           int32_t *y,
+                                           int incy,
+                                           aclComputeType type,
+                                           aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
+                                                          int m,
+                                                          int n,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication. of same type as dataTypeC
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension array used to store  matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension array used to store  matrix B
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension array used to store  matrix C
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
+                                           aclTransType transB,
+                                           aclTransType transC,
+                                           int m,
+                                           int n,
+                                           int k,
+                                           const void *alpha,
+                                           const void *matrixA,
+                                           int lda,
+                                           aclDataType dataTypeA,
+                                           const void *matrixB,
+                                           int ldb,
+                                           aclDataType dataTypeB,
+                                           const void *beta,
+                                           void *matrixC,
+                                           int ldc,
+                                           aclDataType dataTypeC,
+                                           aclComputeType type,
+                                           aclrtStream stream);
+
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
+                                                          aclTransType transB,
+                                                          aclTransType transC,
+                                                          int m,
+                                                          int n,
+                                                          int k,
+                                                          aclDataType dataTypeA,
+                                                          aclDataType dataTypeB,
+                                                          aclDataType dataTypeC,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
+
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
+                                          aclTransType transB,
+                                          aclTransType transC,
+                                          int m,
+                                          int n,
+                                          int k,
+                                          const aclFloat16 *alpha,
+                                          const aclFloat16 *matrixA,
+                                          int lda,
+                                          const aclFloat16 *matrixB,
+                                          int ldb,
+                                          const aclFloat16 *beta,
+                                          aclFloat16 *matrixC,
+                                          int ldc,
+                                          aclComputeType type,
+                                          aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
+                                                         aclTransType transB,
+                                                         aclTransType transC,
+                                                         int m,
+                                                         int n,
+                                                         int k,
+                                                         aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
+                                           aclTransType transB,
+                                           aclTransType transC,
+                                           int m,
+                                           int n,
+                                           int k,
+                                           const int32_t *alpha,
+                                           const int8_t *matrixA,
+                                           int lda,
+                                           const int8_t *matrixB,
+                                           int ldb,
+                                           const int32_t *beta,
+                                           int32_t *matrixC,
+                                           int ldc,
+                                           aclComputeType type,
+                                           aclrtStream stream);
+
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
+                                                          aclTransType transB,
+                                                          aclTransType transC,
+                                                          int m,
+                                                          int n,
+                                                          int k,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
diff --git a/inc/external/acl/acl/ops/acl_dvpp.h b/inc/external/acl/acl/ops/acl_dvpp.h
new file mode 100644
index 00000000..42ec4a8d
--- /dev/null
+++ b/inc/external/acl/acl/ops/acl_dvpp.h
@@ -0,0 +1,2493 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if !defined(ENABLE_DVPP_INTERFACE)
+#if defined(_MSC_VER)
+#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).")
+#else
+#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)."
+#endif
+#endif
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl/acl.h"
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct acldvppPicDesc acldvppPicDesc;
+typedef struct acldvppBatchPicDesc acldvppBatchPicDesc;
+typedef struct acldvppRoiConfig acldvppRoiConfig;
+typedef struct acldvppResizeConfig acldvppResizeConfig;
+typedef struct acldvppBorderConfig acldvppBorderConfig;
+typedef struct acldvppLutMap acldvppLutMap;
+typedef struct acldvppChannelDesc acldvppChannelDesc;
+typedef struct acldvppJpegeConfig acldvppJpegeConfig;
+typedef struct aclvdecChannelDesc aclvdecChannelDesc;
+typedef struct acldvppStreamDesc acldvppStreamDesc;
+typedef struct aclvdecFrameConfig aclvdecFrameConfig;
+typedef struct aclvencChannelDesc aclvencChannelDesc;
+typedef struct aclvencFrameConfig aclvencFrameConfig;
+typedef struct acldvppHist acldvppHist;
+typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData);
+typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata);
+
+// Supported Pixel Format
+enum acldvppPixelFormat {
+    PIXEL_FORMAT_YUV_400 = 0, // 0
+    PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
+    PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
+    PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
+    PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
+    PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
+    PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
+    PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
+    PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
+    PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
+    PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
+    PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
+    PIXEL_FORMAT_RGB_888 = 12, // 12
+    PIXEL_FORMAT_BGR_888 = 13, // 13
+    PIXEL_FORMAT_ARGB_8888 = 14, // 14
+    PIXEL_FORMAT_ABGR_8888 = 15, // 15
+    PIXEL_FORMAT_RGBA_8888 = 16, // 16
+    PIXEL_FORMAT_BGRA_8888 = 17, // 17
+    PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
+    PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
+    PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
+    PIXEL_FORMAT_YVU_PLANAR_422,
+    PIXEL_FORMAT_YVU_PLANAR_444,
+    PIXEL_FORMAT_RGB_444 = 23,
+    PIXEL_FORMAT_BGR_444,
+    PIXEL_FORMAT_ARGB_4444,
+    PIXEL_FORMAT_ABGR_4444,
+    PIXEL_FORMAT_RGBA_4444,
+    PIXEL_FORMAT_BGRA_4444,
+    PIXEL_FORMAT_RGB_555,
+    PIXEL_FORMAT_BGR_555,
+    PIXEL_FORMAT_RGB_565,
+    PIXEL_FORMAT_BGR_565,
+    PIXEL_FORMAT_ARGB_1555,
+    PIXEL_FORMAT_ABGR_1555,
+    PIXEL_FORMAT_RGBA_1555,
+    PIXEL_FORMAT_BGRA_1555,
+    PIXEL_FORMAT_ARGB_8565,
+    PIXEL_FORMAT_ABGR_8565,
+    PIXEL_FORMAT_RGBA_8565,
+    PIXEL_FORMAT_BGRA_8565,
+    PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
+    PIXEL_FORMAT_RGB_BAYER_10BPP,
+    PIXEL_FORMAT_RGB_BAYER_12BPP,
+    PIXEL_FORMAT_RGB_BAYER_14BPP,
+    PIXEL_FORMAT_RGB_BAYER_16BPP,
+    PIXEL_FORMAT_BGR_888_PLANAR = 70,
+    PIXEL_FORMAT_HSV_888_PACKAGE,
+    PIXEL_FORMAT_HSV_888_PLANAR,
+    PIXEL_FORMAT_LAB_888_PACKAGE,
+    PIXEL_FORMAT_LAB_888_PLANAR,
+    PIXEL_FORMAT_S8C1,
+    PIXEL_FORMAT_S8C2_PACKAGE,
+    PIXEL_FORMAT_S8C2_PLANAR,
+    PIXEL_FORMAT_S16C1,
+    PIXEL_FORMAT_U8C1,
+    PIXEL_FORMAT_U16C1,
+    PIXEL_FORMAT_S32C1,
+    PIXEL_FORMAT_U32C1,
+    PIXEL_FORMAT_U64C1,
+    PIXEL_FORMAT_S64C1,
+    PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
+    PIXEL_FORMAT_YVU_SEMIPLANAR_440,
+    PIXEL_FORMAT_FLOAT32,
+    PIXEL_FORMAT_BUTT,
+    PIXEL_FORMAT_UNKNOWN = 10000
+};
+
+// Stream Format
+enum acldvppStreamFormat {
+    H265_MAIN_LEVEL = 0,
+    H264_BASELINE_LEVEL,
+    H264_MAIN_LEVEL,
+    H264_HIGH_LEVEL
+};
+
+// Supported Channel Mode
+enum acldvppChannelMode {
+    DVPP_CHNMODE_VPC = 1,
+    DVPP_CHNMODE_JPEGD = 2,
+    DVPP_CHNMODE_JPEGE = 4
+};
+
+// Supported Border Type
+enum acldvppBorderType {
+    BORDER_CONSTANT = 0,
+    BORDER_REPLICATE,
+    BORDER_REFLECT,
+    BORDER_REFLECT_101
+};
+
+// Venc parameter type
+enum aclvencChannelDescParamType {
+    ACL_VENC_THREAD_ID_UINT64 = 0,
+    ACL_VENC_CALLBACK_PTR,
+    ACL_VENC_PIXEL_FORMAT_UINT32,
+    ACL_VENC_ENCODE_TYPE_UINT32,
+    ACL_VENC_PIC_WIDTH_UINT32,
+    ACL_VENC_PIC_HEIGHT_UINT32,
+    ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
+    ACL_VENC_BUF_ADDR_PTR,
+    ACL_VENC_BUF_SIZE_UINT32,
+    ACL_VENC_RC_MODE_UINT32,
+    ACL_VENC_SRC_RATE_UINT32,
+    ACL_VENC_MAX_BITRATE_UINT32,
+    ACL_VENC_MAX_IP_PROP_UINT32
+};
+
+// Jpeg picture format
+enum acldvppJpegFormat {
+    ACL_JPEG_CSS_444 = 0,
+    ACL_JPEG_CSS_422,
+    ACL_JPEG_CSS_420,
+    ACL_JPEG_CSS_GRAY,
+    ACL_JPEG_CSS_440,
+    ACL_JPEG_CSS_411,
+    ACL_JPEG_CSS_UNKNOWN = 1000
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc device memory for dvpp.
+ *
+ * @par Function
+ * @li It's mainly used for allocating memory to device media data processing.
+ * The requested memory meets the data processing requirements.
+ * After calling this interface to request memory,
+ * you must release the memory using the acldvppFree interface.
+ * @li When calling the acldvppMalloc interface to apply for memory,
+ * the size entered by the user is aligned upwards to 32 integer multiples,
+ * and an additional 32 bytes are applied.
+ *
+ * @par Restriction
+ * If the user uses the acldvppMalloc interface to apply for a large block of
+ * memory and divide and manage the memory by himself,
+ * when applying for memory, the user needs to align up to 32 integer
+ * times + 32 bytes (ALIGN_UP [len] +32 words) according to
+ * the actual data size of each picture Section) to manage memory.
+ *
+ * @param devPtr [OUT]    memory pointer.
+ * @param size [IN]       memory size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppFree
+ */
+ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory for dvpp.
+ *
+ * @par Function
+ * Free the memory requested through the acldvppMalloc interface
+ * @param devPtr [IN]      memory pointer to free.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief create DvppChannelDesc.
+ *
+ * @par Function
+ * Create a channel for image data processing.
+ * The same channel can be reused
+ * and is no longer available after destruction
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy dvppChannelDesc.
+ *
+ * @par Function
+ * Can only destroy channels created by the acldvppCreateChannel interface
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc | acldvppDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp channel Id.
+ *
+ * @par Restriction
+ * Interface calling sequence:
+ * acldvppCreateChannelDesc --> acldvppCreateChannel -->
+ * acldvppGetChannelDescChannelId
+ *
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval channel id.
+ *
+ * @see acldvppCreateChannelDesc | acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp picture description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp picture description.
+ *
+ * @par Function
+ * Can only destroy picture description information created
+ * through acldvppCreatePicDesc interface.
+ * @param picDesc [IN]     dvpp picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's data.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param dataDev [IN]    dvpp picture dataDev.Must be the memory
+ *                        requested using the acldvppMalloc interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's size.
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param size dvpp [IN]     picture size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's format.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param format [IN]      dvpp picture format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's width.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param width [IN]      dvpp picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's height.
+ *
+ * @param picDesc [OUT]  dvpp picture description.
+ * @param height [IN]    dvpp picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param widthStride [IN]   dvpp picture widthStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [OUT]        dvpp picture description.
+ * @param heightStride [IN]    dvpp picture heightStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's retcode.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param retCode [IN]     dvpp picture retcode.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data addr.
+ * @retval default nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data size.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's format.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval format
+ * @retval default PIXEL_FORMAT_YUV_400.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's width.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's height.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's retcode.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval ret code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp roi config.
+ *
+ * @param left [IN]    the left offset, must be even
+ * @param right [IN]   the right offset, must be odd
+ * @param top [IN]     the top offset, must be even
+ * @param bottom [IN]  the bottom offset, must be odd
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
+                                                             uint32_t right,
+                                                             uint32_t top,
+                                                             uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp roi config.
+ *
+ * @par Function
+ * Destroys data created through the acldvppCreateRoiConfig interface
+ * @param roiConfig [IN]    dvpp roi config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param left [IN]     left offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param right [IN]    right offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param top [IN]      top offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of RoiConfig.
+ *
+ * @param config [OUT]   RoiConfig
+ * @param bottom [IN]    bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set RoiConfig.
+ *
+ * @param config [OUT]    RoiConfig
+ * @param left [IN]       left offset
+ * @param right [IN]      right offset
+ * @param top [IN]        top offset
+ * @param bottom [IN]     bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
+                                                 uint32_t left,
+                                                 uint32_t right,
+                                                 uint32_t top,
+                                                 uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp resize config.
+ * The specified scaling algorithm is not supported.
+ * The default scaling algorithm is "nearest neighbor interpolation".
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp resize config.
+ *
+ * @par Function
+ * Destroys the scaling configuration data created by
+ * the acldvppCreateResizeConfig interface
+ *
+ * @param resizeConfig [IN]    resize config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create jpege config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy jpege config.
+ *
+ * @par Function
+ * Destroys the encoding configuration data created by
+ * the acldvppCreateJpegeConfig interface
+ * @param jpegeConfig [IN] config pointer to destroy.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set jpege config's level.
+ *
+ * @param jpegeConfig [OUT]    Call the acldvppCreateJpegeConfig
+ *                             interface to create acldvppJpegeConfig data
+ * @param level [IN]   Encoding quality range [0, 100],
+ *                     where level 0 encoding quality is similar to level 100,
+ *                     and the smaller the value in [1, 100],
+ *                     the worse the quality of the output picture.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get jpege config's level.
+ *
+ * @param jpegeConfig [IN]    jpege config.
+ *
+ * @retval compression level.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vdecChannelDesc.Channel description information
+ * when creating a video data processing channel.
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vdecChannelDesc.
+ *
+ * @par Function
+ * Can only destroy aclvdecChannelDesc type created
+ * through aclvdecCreateChannelDesc interface
+ * @param channelDesc [IN]    channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's channel id.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param channelId [IN]     decoding channel id: 0~15.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's thread id.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param threadId [IN]        thread id.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's callback function.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param callback [IN]      function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's video encoding type.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param enType [IN]        video encoding type.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture format.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicFormat [IN]     out picture format (acldvppPixelFormat).
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc,
+                                                               acldvppPixelFormat outPicFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture width.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param outPicWidth [IN]     out picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture height.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicHeight [IN]     out picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's reference frame num.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param refFrameNum [IN]     reference frame num.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's bit depth.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param bitDepth [IN]      bit depth.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's channel id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval decoding channel id: 0~15.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's thread id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval thread id.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's callback function.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ * @retval default null.
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's video encoding type.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval video encoding type.
+ * @retval default H265_MAIN_LEVEL.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture format.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture format.
+ * @retval default DVPP_OUTPUT_YUV420SP_UV.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture width.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture height.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture height (for vdec malloc memory).
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's bit depth.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval bit depth.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's reference frame num.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval reference frame num.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vencChannelDesc.
+ *
+ * @retval null for failed, other success
+ */
+ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vencChannelDesc.
+ *
+ * @param channelDesc [IN] channel desc.
+ *
+ * @retval ACL_SUCCESS:success, other:failed
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param threadId [IN] thread id
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set func callback for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param callback [IN]     func callback
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set video encoding type for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param enType [IN]       video encoding type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic format for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picFormat [IN]    pic format
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc,
+                                                            acldvppPixelFormat picFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set out pic width for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picWidth [IN]     pic width
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic height for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picHeight [IN]    pic height
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set key frame interval for venc channel desc.
+ *
+ * @param channelDesc [OUT]     venc channel desc
+ * @param keyFrameInterval [IN] Interval of key frame
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc,
+                                                                   uint32_t keyFrameInterval);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer address for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufAddr [IN]      output buffer address
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer size for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufSize [IN]      output buffer size
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set rc model for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param rcMode [IN]       venc rc mode(VBR=1, CBR=2)
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set source rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param srcRate [IN] source rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set max bit rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param maxBitRate [IN]   max bit rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set venc parameter for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param paramType [IN]    parameter type
+ * @param length [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
+    aclvencChannelDescParamType paramType, size_t length, const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer address for venc channel desc.
+ *
+ * @param channelDesc[IN] venc channel desc
+ *
+ * @retval output buffer address
+ */
+ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer size for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval output buffer size
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding channel id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval decoding channel id: 0~15, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval thread id, default 0
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get func callback for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval func callback, default null
+ */
+ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get video encoding type for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval video encoding type, default H265_MAIN_LEVEL
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic format for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic format
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic width for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic width, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic height for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic height, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get interval of key frame for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval interval of key frame, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get rc mode for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval rc mode, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get source rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval source rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get max bit rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval max bit rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get venc parameter for venc channel desc.
+ *
+ * @param channelDesc [IN]   venc channel desc
+ * @param paramType [IN]     parameter type
+ * @param length [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
+    aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval 0: Not forced; 1: Forced restart of I-frame -1: error
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval Whether it is the end frame: 0: no; 1: end frame
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT]    venc frame config
+ * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT] venc frame config
+ * @param eos [IN]     Whether it is the end frame: 0: no; 1: end frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc destroy frame config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc frame config.
+ *
+ * @retval null for failed, other aclvencFrameConfig ptr
+ */
+ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc channel.
+ *
+ * @param channelDesc [IN|OUT] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp venc channel.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc launch send frame task.
+ *
+ * @param channelDesc [IN] venc channel desc
+ * @param input [IN]       input picture desc
+ * @param reserve [IN]     reserve parameter
+ * @param config [IN]      dvpp frame config
+ * @param userdata [IN]    user callback function
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve,
+                                              aclvencFrameConfig *config, void *userdata);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp stream description.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp stream description.
+ *
+ * @par Function
+ * Can only destroy acldvppStreamDesc type created through
+ * acldvppCreateStreamDesc interface.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateStreamDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data addr.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param dataDev [IN]        data addr.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data size.
+ *
+ * @param streamDesc [OUT]     dvpp stream description.
+ * @param size [IN]            data size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's format.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param format [IN]         stream format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's timestamp.
+ *
+ * @param streamDesc [OUT]  dvpp stream description.
+ * @param timestamp [IN]    current timestamp.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's ret code.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param retCode [IN]        result code.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's eos.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param eos [IN]            end flag of sequence.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data addr.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval data addr.
+ * @retval deault nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data size.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's format.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval stream format.
+ * @retval default ACL_DVPP_STREAM_H264.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's timestamp.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval current timestamp.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's retCode.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval result code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's eos.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval end flag of sequence.
+ * @retval default 0(false).
+ */
+ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec frame config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec frame config.
+ *
+ * @par Function
+ * Can only destroy aclvdecFrameConfig type created through
+ *  aclvdecCreateFrameConfig interface
+ *
+ * @param vdecFrameConfig [IN]     vdec frame config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateFrameConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
+                                                     uint32_t size,
+                                                     uint32_t *width,
+                                                     uint32_t *height,
+                                                     int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ * @param format [OUT]       the format of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
+                                                       uint32_t size,
+                                                       uint32_t *width,
+                                                       uint32_t *height,
+                                                       int32_t *components,
+                                                       acldvppJpegFormat *format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict encode size of jpeg image.
+ *
+ * @param inputDesc [IN]     dvpp image desc
+ * @param config [IN]        jpeg encode config
+ * @param size [OUT]         the size predicted of image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
+                                                       const acldvppJpegeConfig *config,
+                                                       uint32_t *size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of jpeg image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
+                                                       uint32_t dataSize,
+                                                       acldvppPixelFormat outputPixelFormat,
+                                                       uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of png.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
+                                                    uint32_t dataSize,
+                                                    uint32_t *width,
+                                                    uint32_t *height,
+                                                    int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of png image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
+                                                      uint32_t dataSize,
+                                                      acldvppPixelFormat outputPixelFormat,
+                                                      uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp channel, the same channel can be reused
+ * and is no longer available after destruction.
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp channel.
+ *
+ * @par Restriction
+ * Can only destroy channel created through the acldvppCreateChannel interface
+ *
+ * @param channelDesc [IN]   the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc resize.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    resize input picture destruction
+ * @param outputDesc [IN|OUT]  resize output picture destruction
+ * @param resizeConfig [IN] resize config
+ * @param stream [IN]       resize task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ * | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
+                                                   acldvppPicDesc *inputDesc,
+                                                   acldvppPicDesc *outputDesc,
+                                                   acldvppResizeConfig *resizeConfig,
+                                                   aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and then store the  picture in the output memory as the output picture
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    crop input picture destruction
+ * @param outputDesc [IN|OUT]  crop output picture destruction
+ * @param cropArea [IN]     crop area config
+ * @param stream [IN]       crop task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
+                                                 acldvppPicDesc *inputDesc,
+                                                 acldvppPicDesc *outputDesc,
+                                                 acldvppRoiConfig *cropArea,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]         the channel destruction
+ * @param srcBatchPicDescs [IN]    crop input batch picture destruction
+ * @param roiNums [IN]    roi config numbers
+ * @param size [IN]       roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]    crop area configs
+ * @param stream [IN]       crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
+                                                      acldvppBatchPicDesc *srcBatchPicDescs,
+                                                      uint32_t *roiNums,
+                                                      uint32_t size,
+                                                      acldvppBatchPicDesc *dstBatchPicDescs,
+                                                      acldvppRoiConfig *cropAreas[],
+                                                      aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop and paste.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and paste the picture to the specified position of the target picture
+ * as the output picture
+ *
+ * @param channelDesc [IN]   thechannel destruction
+ * @param inputDesc [IN]     crop and paste input picture destruction
+ * @param outputDesc [IN|OUT]   crop and paste output picture destruction
+ * @param cropArea [IN]      crop area config
+ * @param pasteArea [IN]     paste area config
+ * @param stream [IN]        crop and paste task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                         acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         acldvppRoiConfig *cropArea,
+                                                         acldvppRoiConfig *pasteArea,
+                                                         aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop and paste.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area,
+ * and paste the pictures to the specified position of the target pictures
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]       the channel destruction
+ * @param srcBatchPicDescs [IN]  crop input batch picture destruction
+ * @param roiNums [IN]     roi config numbers
+ * @param size [IN]        roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]   crop area configs
+ * @param pasteAreas [IN]  paste area configs
+ * @param stream [IN]      crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                               acldvppBatchPicDesc *srcBatchPicDescs,
+                                                               uint32_t *roiNums,
+                                                               uint32_t size,
+                                                               acldvppBatchPicDesc *dstBatchPicDescs,
+                                                               acldvppRoiConfig *cropAreas[],
+                                                               acldvppRoiConfig *pasteAreas[],
+                                                               aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg decode.
+ *
+ * @par Function
+ * For different source picture formats, after decoding,
+ * output pictures in the following format:
+ * @li jpeg(444) -> YUV444SP:V is front U is back,
+ * YUV420 SP V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(422) -> YUV422SP:V is in front U is behind,
+ * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind;
+ * @li jpeg(420) -> YUV420SP:
+ * V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param data [IN]         decode input picture destruction's data
+ * @param size [IN]         decode input picture destruction's size
+ * @param outputDesc [IN|OUT]  decode output picture destruction
+ * @param stream [IN]       decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
+                                                    const void *data,
+                                                    uint32_t size,
+                                                    acldvppPicDesc *outputDesc,
+                                                    aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg encode.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    encode input picture destruction
+ * @param data [OUT]        encode output picture destruction's data
+ * @param size [IN|OUT]     encode output picture destruction's size
+ * @param config [IN]       jpeg encode config
+ * @param stream [IN]       encode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
+                                                    acldvppPicDesc *inputDesc,
+                                                    const void *data,
+                                                    uint32_t *size,
+                                                    acldvppJpegeConfig *config,
+                                                    aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc png decode.
+ *
+ * @param channelDesc [IN]    the channel destruction
+ * @param data [IN]           decode input picture destruction's data
+ * @param size [IN]           decode input picture destruction's size
+ * @param outputDesc [IN|OUT]    decode output picture destruction
+ * @param stream [IN]         decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
+                                                   const void *data,
+                                                   uint32_t size,
+                                                   acldvppPicDesc *outputDesc,
+                                                   aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec channel.
+ *
+ * @par Function
+ * Create a channel for video data processing,
+ * the same channel can be reused,
+ * and is no longer available after destruction
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec channel.
+ *
+ * @par Function
+ * Can only destroy channels created by the aclvdecCreateChannel interface
+ *
+ * @param channelDesc [IN]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send frame.
+ *
+ * @par Function
+ * Pass the input memory to be decoded
+ * and the decoded output memory to the decoder for decoding
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param output [IN|OUT]  output picture destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
+                                              acldvppStreamDesc *input,
+                                              acldvppPicDesc *output,
+                                              aclvdecFrameConfig *config,
+                                              void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send skipped frame.
+ *
+ * @par Function
+ * Pass video frame to decoder
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
+                                                     acldvppStreamDesc *input,
+                                                     aclvdecFrameConfig *config,
+                                                     void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc convert color.
+ *
+ * @par Restriction
+ * @li outputDesc:Width height stride, No changes are allowed. Just configure 0
+ * @par Function
+ * Convert color gamut
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   convert color input picture destruction
+ * @param outputDesc [IN|OUT] convert color output picture destruction
+ * @param stream [IN]      convert color task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
+                                                         acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc pyramid down.
+ *
+ * @par Restriction
+ * @li outputDesc:format only supported YUV400
+ * @par Function
+ * Image pyramid down
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   pyr down input picture destruction
+ * @param outputDesc [IN|OUT] pyr down output picture destruction
+ * @param reserve [IN]     reserved param , must be nullptr
+ * @param stream [IN]      pyr down task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
+                                                    acldvppPicDesc *inputDesc,
+                                                    acldvppPicDesc *outputDesc,
+                                                    void *reserve,
+                                                    aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp channel mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param mode [IN]         channel mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
+                                                       uint32_t mode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set resize config interpolation.
+ *
+ * @param resizeConfig [OUT] the resize config
+ * @param interpolation [IN] interpolation
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig,
+                                                                 uint32_t interpolation);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get resize config interpolation.
+ *
+ * @param resizeConfig [IN] the resize config
+ *
+ * @retval Interpolation of resize config.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel out mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param outMode [IN] channel out mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
+                                                          uint32_t outMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel out mode.
+ *
+ * @param channelDesc [IN] the channel destruction
+ *
+ * @retval Out mode of channel destruction
+ * @retval default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp batch picture description.
+ *
+ * @param batchSize [IN]    batch size
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture description.
+ *
+ * @param batchPicDesc [IN] dvpp batch picture description.
+ * @param index [IN]        index of batch
+ *
+ * @retval null for failed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp batch picture description.
+ *
+ * @par Function
+ * Can only destroy batch picture description information created
+ * through acldvppCreateBatchPicDesc interface.
+ *
+ * @param batchPicDesc [IN]     dvpp batch picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp lut map.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy lut map.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map dims.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval 0 for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map data.
+ *
+ * @param lutMap [IN]   lut map
+ * @param dim [IN]      input dim of map
+ * @param data [OUT]    the dim of lut map's data
+ * @param len [OUT]     the dim of lut map's length
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
+                                                  uint32_t dim,
+                                                  uint8_t **data,
+                                                  uint32_t *len);
+/**
+ * @ingroup AscendCL
+ * @brief Vpc equalize hist.
+ *
+ * @param channelDesc [IN] channel desc
+ * @param inputDesc [IN]   input desc
+ * @param outputDesc [IN|OUT] output desc
+ * @param lutMap [IN]      lut map param
+ * @param stream [IN]      runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
+                                                         const acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         const acldvppLutMap *lutMap,
+                                                         aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp border config.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Set value of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param index [IN]         index of value array
+ * @param value [IN]         value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
+                                                         uint32_t index,
+                                                         double value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set border type of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param borderType [IN]    border type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig,
+                                                              acldvppBorderType borderType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param top [IN]           top of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param bottom [IN]        bottom of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param left [IN]          left of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param right [IN]         right of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get value of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @param index[IN] index of value array
+ *
+ * @retval invalid value is < 0, normal Value is >= 0
+ */
+ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get border type of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @retval border type of border config
+ */
+ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get Bottom of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get left of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, right value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Vpc make border.
+ *
+ * @param channelDesc [IN]  channel desc
+ * @param inputDesc [IN]    input desc
+ * @param outputDesc [IN|OUT]  output desc
+ * @param borderConfig [IN] border config param
+ * @param stream [IN]       runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
+                                                       const acldvppPicDesc *inputDesc,
+                                                       acldvppPicDesc *outputDesc,
+                                                       const acldvppBorderConfig *borderConfig,
+                                                       aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Dvpp vpc calc hist.
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param srcPicDesc [IN]  pyr down input picture destruction
+ * @param hist [IN|OUT]    pyr down output picture destruction
+ * @param reserve [IN]     reserved param, must be nullptr
+ * @param stream [IN]      task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
+                                                     acldvppPicDesc *srcPicDesc,
+                                                     acldvppHist *hist,
+                                                     void *reserve,
+                                                     aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vpc hist description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vpc hist description.
+ *
+ * @par Function
+ * Can only destroy hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims of vpc hist description.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval dims of vpc hist description.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data from vpc hist description by dim.
+ *
+ * @param hist [IN]  vpc hist description.
+ * @param dim [IN]   which dim to get data.
+ * @param data [OUT] address of output hist data.
+ * @param len [OUT]  len of output hist data.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp calc hist process return code.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval Dvpp calc hist process return code.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vpc hist description to 0.
+ *
+ * @par Function
+ * Can only clear hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
diff --git a/inc/external/acl/acl/ops/acl_fv.h b/inc/external/acl/acl/ops/acl_fv.h
new file mode 100644
index 00000000..40cd50cb
--- /dev/null
+++ b/inc/external/acl/acl/ops/acl_fv.h
@@ -0,0 +1,351 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclfvInitPara aclfvInitPara;
+typedef struct aclfvFeatureInfo aclfvFeatureInfo;
+typedef struct aclfvRepoRange aclfvRepoRange;
+typedef struct aclfvQueryTable aclfvQueryTable;
+typedef struct aclfvSearchInput aclfvSearchInput;
+typedef struct aclfvSearchResult aclfvSearchResult;
+
+// search operation type
+enum aclfvSearchType {
+    SEARCH_1_N, // 1:N operation type
+    SEARCH_N_M  // N:M operation type
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv init param.
+ *
+ * @param fsNum [IN]  The feature num
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv init param.
+ *
+ * @par Function
+ * Can only destroy fv init param information created
+ * through aclfvCreateInitPara interface.
+ *
+ * @param initPara [IN]   fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateInitPara
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumFor1N which in fv init param.
+ *
+ * @param initPara [IN|OUT]     fv init param.
+ * @param maxTopNumFor1N [IN]   maxTopNumFor1N value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumForNM which in fv init param.
+ *
+ * @param initPara [IN|OUT]        fv init param.
+ * @param maxTopNumForNM [IN]   maxTopNumForNM value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv feature info.
+ *
+ * @param id0 [IN]     The first level library id0
+ * @param id1 [IN]     Secondary library id1
+ * @param offset [IN]  The offset of the first feature in the library
+ * @param featureLen [IN]       Single feature length
+ * @param featureCount [IN]     Single feature count
+ * @param featureData [IN]      Feature value list
+ * @param featureDataLen [IN]   Feature value list length
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
+    uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv feature info.
+ *
+ * @par Function
+ * Can only destroy fv feature info information created
+ * through aclfvCreateFeatureInfo interface.
+ *
+ * @param featureInfo [IN]     fv feature info.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateFeatureInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv repo range.
+ *
+ * @param id0Min [IN]  id0 start value
+ * @param id0Min [IN]  id0 max
+ * @param id1Min [IN]  id0 start value
+ * @param id1Max [IN]  id1 max
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
+                                                         uint32_t id1Max);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv repo range.
+ *
+ * @par Function
+ * Can only destroy fv repo range information created
+ * through aclfvCreateRepoRange interface.
+ *
+ * @param repoRange [IN]     fv repo range.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateRepoRange
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create query table.
+ *
+ * @param queryCnt [IN]   Number of tables, the maximum number is 6
+ * @param tableLen [IN]   Single table length, table length is 32KB
+ * @param tableData [IN]  Feature value list
+ * @param tableDataLen [IN]   The length of memory requested by the featureData pointer
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
+                                                           uint32_t tableDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy query table.
+ *
+ * @par Function
+ * Can only destroy query table information created
+ * through aclfvCreateQueryTable interface.
+ *
+ * @param queryTable [IN]     query table.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateQueryTable
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search input.
+ *
+ * @param queryTable [IN]  query table
+ * @param repoRange [IN]   query repo range
+ * @param topk [IN]  query topk
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
+                                                             uint32_t topk);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search input.
+ *
+ * @par Function
+ * Can only destroy search input information created
+ * through aclfvCreateSearchInput interface.
+ *
+ * @param searchInput [IN]     search input.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchInput
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search result.
+ *
+ * @param queryCnt [IN]   Retrieve the number of features
+ * @param resultNum [IN]  The number of search results for each feature, the number is queryCnt
+ * @param resultNumDataLen [IN]  resultNum memory length
+ * @param id0 [IN]  Level 1 library id0
+ * @param id1 [IN]  Secondary library id1
+ * @param resultOffset [IN]   The offset of the bottom library corresponding
+ * to each feature retrieval result, total length topK * queryCnt
+ * @param resultDistance [IN]  Distance, total length topK * queryCnt
+ * @param dataLen [IN]  The memory size requested by
+ * id0\id1\reslutOffset\resultDistance
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
+    uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
+    uint32_t dataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search result.
+ *
+ * @par Function
+ * Can only destroy search result information created
+ * through aclfvCreateSearchResult interface.
+ *
+ * @param searchResult [IN]     search result.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchResult
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv IP initialize.
+ *
+ * @param initPara [IN]     fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief release fv resources.
+ *
+ * @par Function
+ * Can only release fv resources created
+ * through aclfvInit interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ *
+ * @see aclfvInit
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRelease();
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo add.
+ *
+ * @param type [IN]          repo add type
+ * @param featureInfo [IN]   add feature information
+ * @param stream [IN]        stream of task execute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo del.
+ *
+ * @param type [IN]       repo delete type
+ * @param repoRange [IN]  repo range information
+ * @param stream [IN]     stream of task execute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate del.
+ *
+ * @param featureInfo [IN]   accurate delete feature information
+ * @param stream [IN]        stream of task execute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate modify.
+ *
+ * @param featureInfo [IN]  accurate modify feature information
+ * @param stream [IN]  stream of task execute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv search.
+ *
+ * @param type [IN]  search type
+ * @param searchInput [IN]    search input
+ * @param searchRst [OUT]     search result
+ * @param stream [IN]  stream of task execute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
+                                         aclfvSearchResult *searchRst, aclrtStream stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
index b3111860..0b520002 100644
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
 static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
 static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
 static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
+static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
 
 static const int ACL_ERROR_BAD_ALLOC = 200000;
 static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
@@ -134,42 +135,42 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005;
 #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE
 
 typedef enum {
-  ACL_DT_UNDEFINED = -1,
-  ACL_FLOAT = 0,
-  ACL_FLOAT16 = 1,
-  ACL_INT8 = 2,
-  ACL_INT32 = 3,
-  ACL_UINT8 = 4,
-  ACL_INT16 = 6,
-  ACL_UINT16 = 7,
-  ACL_UINT32 = 8,
-  ACL_INT64 = 9,
-  ACL_UINT64 = 10,
-  ACL_DOUBLE = 11,
-  ACL_BOOL = 12,
-  ACL_STRING = 13,
+    ACL_DT_UNDEFINED = -1,
+    ACL_FLOAT = 0,
+    ACL_FLOAT16 = 1,
+    ACL_INT8 = 2,
+    ACL_INT32 = 3,
+    ACL_UINT8 = 4,
+    ACL_INT16 = 6,
+    ACL_UINT16 = 7,
+    ACL_UINT32 = 8,
+    ACL_INT64 = 9,
+    ACL_UINT64 = 10,
+    ACL_DOUBLE = 11,
+    ACL_BOOL = 12,
+    ACL_STRING = 13,
 } aclDataType;
 
 typedef enum {
-  ACL_FORMAT_UNDEFINED = -1,
-  ACL_FORMAT_NCHW = 0,
-  ACL_FORMAT_NHWC = 1,
-  ACL_FORMAT_ND = 2,
-  ACL_FORMAT_NC1HWC0 = 3,
-  ACL_FORMAT_FRACTAL_Z = 4,
-  ACL_FORMAT_NC1HWC0_C04 = 12,
-  ACL_FORMAT_NDHWC = 27,
-  ACL_FORMAT_FRACTAL_NZ = 29,
-  ACL_FORMAT_NCDHW = 30,
-  ACL_FORMAT_NDC1HWC0 = 32,
-  ACL_FRACTAL_Z_3D = 33
+    ACL_FORMAT_UNDEFINED = -1,
+    ACL_FORMAT_NCHW = 0,
+    ACL_FORMAT_NHWC = 1,
+    ACL_FORMAT_ND = 2,
+    ACL_FORMAT_NC1HWC0 = 3,
+    ACL_FORMAT_FRACTAL_Z = 4,
+    ACL_FORMAT_NC1HWC0_C04 = 12,
+    ACL_FORMAT_NDHWC = 27,
+    ACL_FORMAT_FRACTAL_NZ = 29,
+    ACL_FORMAT_NCDHW = 30,
+    ACL_FORMAT_NDC1HWC0 = 32,
+    ACL_FRACTAL_Z_3D = 33
 } aclFormat;
 
 typedef enum {
-  ACL_DEBUG = 0,
-  ACL_INFO = 1,
-  ACL_WARNING = 2,
-  ACL_ERROR = 3,
+    ACL_DEBUG = 0,
+    ACL_INFO = 1,
+    ACL_WARNING = 2,
+    ACL_ERROR = 3,
 } aclLogLevel;
 
 /**
@@ -304,7 +305,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
  * @retval aclTensorDesc pointer.
  * @retval nullptr if param is invalid or run out of memory
  */
-ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
+                                                       int numDims,
+                                                       const int64_t *dims,
                                                        aclFormat format);
 
 /**
@@ -326,7 +329,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
+ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
+                                                    size_t dimsCount,
                                                     int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);
 
 /**
@@ -423,7 +427,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
+                                                      size_t index,
+                                                      size_t dimRangeNum,
                                                       int64_t *dimRange);
 
 /**
@@ -460,7 +466,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
-                                                      aclTensorDesc **dstDesc);
+    aclTensorDesc **dstDesc);
 
 /**
  * @ingroup AscendCL
@@ -548,7 +554,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu
  *
  * @retval null for failed.
  * @retval OtherValues success.
- */
+*/
 ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);
 
 /**
@@ -559,7 +565,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc,
  *
  * @retval null for failed
  * @retval OtherValues success
- */
+*/
 ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
 
 /**
@@ -599,12 +605,13 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu
  * @param ... [IN]         the value of current log
  */
 ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
-                                   const char *fmt, ...);
+    const char *fmt, ...);
 
-#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+#define ACL_APP_LOG(level, fmt, ...) \
+    aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_BASE_H_
+#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
index 5886d857..4f3e257f 100644
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -27,19 +27,19 @@
 extern "C" {
 #endif
 
-#define ACL_MAX_DIM_CNT 128
-#define ACL_MAX_TENSOR_NAME_LEN 128
-#define ACL_MAX_BATCH_NUM 128
-#define ACL_MAX_HW_NUM 128
-#define ACL_MAX_SHAPE_COUNT 128
-#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
-
-#define ACL_MDL_LOAD_FROM_FILE 1
-#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
-#define ACL_MDL_LOAD_FROM_MEM 3
-#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
-#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
-#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
+#define ACL_MAX_DIM_CNT          128
+#define ACL_MAX_TENSOR_NAME_LEN  128
+#define ACL_MAX_BATCH_NUM        128
+#define ACL_MAX_HW_NUM           128
+#define ACL_MAX_SHAPE_COUNT      128
+#define ACL_INVALID_NODE_INDEX   0xFFFFFFFF
+
+#define ACL_MDL_LOAD_FROM_FILE            1
+#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM   2
+#define ACL_MDL_LOAD_FROM_MEM             3
+#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM    4
+#define ACL_MDL_LOAD_FROM_FILE_WITH_Q     5
+#define ACL_MDL_LOAD_FROM_MEM_WITH_Q      6
 
 #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
 #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
@@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo;
 typedef struct aclmdlConfigHandle aclmdlConfigHandle;
 
 typedef enum {
-  ACL_YUV420SP_U8 = 1,
-  ACL_XRGB8888_U8,
-  ACL_RGB888_U8,
-  ACL_YUV400_U8,
-  ACL_NC1HWC0DI_FP16,
-  ACL_NC1HWC0DI_S8,
-  ACL_ARGB8888_U8,
-  ACL_YUYV_U8,
-  ACL_YUV422SP_U8,
-  ACL_AYUV444_U8,
-  ACL_RAW10,
-  ACL_RAW12,
-  ACL_RAW16,
-  ACL_RAW24,
-  ACL_AIPP_RESERVED = 0xffff,
+    ACL_YUV420SP_U8 = 1,
+    ACL_XRGB8888_U8,
+    ACL_RGB888_U8,
+    ACL_YUV400_U8,
+    ACL_NC1HWC0DI_FP16,
+    ACL_NC1HWC0DI_S8,
+    ACL_ARGB8888_U8,
+    ACL_YUYV_U8,
+    ACL_YUV422SP_U8,
+    ACL_AYUV444_U8,
+    ACL_RAW10,
+    ACL_RAW12,
+    ACL_RAW16,
+    ACL_RAW24,
+    ACL_AIPP_RESERVED = 0xffff,
 } aclAippInputFormat;
 
 typedef enum {
-  ACL_MDL_PRIORITY_INT32 = 0,
-  ACL_MDL_LOAD_TYPE_SIZET,
-  ACL_MDL_PATH_PTR,     /**< pointer to model load path with deep copy */
-  ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
-  ACL_MDL_MEM_SIZET,
-  ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
-  ACL_MDL_WEIGHT_SIZET,
-  ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
-  ACL_MDL_WORKSPACE_SIZET,
-  ACL_MDL_INPUTQ_NUM_SIZET,
-  ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
-  ACL_MDL_OUTPUTQ_NUM_SIZET,
-  ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
+    ACL_MDL_PRIORITY_INT32 = 0,
+    ACL_MDL_LOAD_TYPE_SIZET,
+    ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
+    ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
+    ACL_MDL_MEM_SIZET,
+    ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
+    ACL_MDL_WEIGHT_SIZET,
+    ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
+    ACL_MDL_WORKSPACE_SIZET,
+    ACL_MDL_INPUTQ_NUM_SIZET,
+    ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
+    ACL_MDL_OUTPUTQ_NUM_SIZET,
+    ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
 } aclmdlConfigAttr;
 
 typedef enum {
-  ACL_DATA_WITHOUT_AIPP = 0,
-  ACL_DATA_WITH_STATIC_AIPP,
-  ACL_DATA_WITH_DYNAMIC_AIPP,
-  ACL_DYNAMIC_AIPP_NODE
+    ACL_DATA_WITHOUT_AIPP = 0,
+    ACL_DATA_WITH_STATIC_AIPP,
+    ACL_DATA_WITH_DYNAMIC_AIPP,
+    ACL_DYNAMIC_AIPP_NODE
 } aclmdlInputAippType;
 
 typedef struct aclmdlIODims {
-  char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
-  size_t dimCount;                    /**< dim array count */
-  int64_t dims[ACL_MAX_DIM_CNT];      /**< dim data array */
+    char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
+    size_t dimCount;  /**< dim array count */
+    int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
 } aclmdlIODims;
 
 typedef struct aclAippDims {
-  aclmdlIODims srcDims;     /**< input dims before model transform */
-  size_t srcSize;           /**< input size before model transform */
-  aclmdlIODims aippOutdims; /**< aipp output dims */
-  size_t aippOutSize;       /**< aipp output size */
+    aclmdlIODims srcDims; /**< input dims before model transform */
+    size_t srcSize; /**< input size before model transform */
+    aclmdlIODims aippOutdims; /**< aipp output dims */
+    size_t aippOutSize; /**< aipp output size */
 } aclAippDims;
 
 typedef struct aclmdlBatch {
-  size_t batchCount;                 /**< batch array count */
-  uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
+    size_t batchCount; /**< batch array count */
+    uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
 } aclmdlBatch;
 
 typedef struct aclmdlHW {
-  size_t hwCount;                 /**< height&width array count */
-  uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
+    size_t hwCount; /**< height&width array count */
+    uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
 } aclmdlHW;
 
 typedef struct aclAippInfo {
-  aclAippInputFormat inputFormat;
-  int32_t srcImageSizeW;
-  int32_t srcImageSizeH;
-  int8_t cropSwitch;
-  int32_t loadStartPosW;
-  int32_t loadStartPosH;
-  int32_t cropSizeW;
-  int32_t cropSizeH;
-  int8_t resizeSwitch;
-  int32_t resizeOutputW;
-  int32_t resizeOutputH;
-  int8_t paddingSwitch;
-  int32_t leftPaddingSize;
-  int32_t rightPaddingSize;
-  int32_t topPaddingSize;
-  int32_t bottomPaddingSize;
-  int8_t cscSwitch;
-  int8_t rbuvSwapSwitch;
-  int8_t axSwapSwitch;
-  int8_t singleLineMode;
-  int32_t matrixR0C0;
-  int32_t matrixR0C1;
-  int32_t matrixR0C2;
-  int32_t matrixR1C0;
-  int32_t matrixR1C1;
-  int32_t matrixR1C2;
-  int32_t matrixR2C0;
-  int32_t matrixR2C1;
-  int32_t matrixR2C2;
-  int32_t outputBias0;
-  int32_t outputBias1;
-  int32_t outputBias2;
-  int32_t inputBias0;
-  int32_t inputBias1;
-  int32_t inputBias2;
-  int32_t meanChn0;
-  int32_t meanChn1;
-  int32_t meanChn2;
-  int32_t meanChn3;
-  float minChn0;
-  float minChn1;
-  float minChn2;
-  float minChn3;
-  float varReciChn0;
-  float varReciChn1;
-  float varReciChn2;
-  float varReciChn3;
-  aclFormat srcFormat;
-  aclDataType srcDatatype;
-  size_t srcDimNum;
-  size_t shapeCount;
-  aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
-  aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
+    aclAippInputFormat inputFormat;
+    int32_t srcImageSizeW;
+    int32_t srcImageSizeH;
+    int8_t cropSwitch;
+    int32_t loadStartPosW;
+    int32_t loadStartPosH;
+    int32_t cropSizeW;
+    int32_t cropSizeH;
+    int8_t resizeSwitch;
+    int32_t resizeOutputW;
+    int32_t resizeOutputH;
+    int8_t paddingSwitch;
+    int32_t leftPaddingSize;
+    int32_t rightPaddingSize;
+    int32_t topPaddingSize;
+    int32_t bottomPaddingSize;
+    int8_t cscSwitch;
+    int8_t rbuvSwapSwitch;
+    int8_t axSwapSwitch;
+    int8_t singleLineMode;
+    int32_t matrixR0C0;
+    int32_t matrixR0C1;
+    int32_t matrixR0C2;
+    int32_t matrixR1C0;
+    int32_t matrixR1C1;
+    int32_t matrixR1C2;
+    int32_t matrixR2C0;
+    int32_t matrixR2C1;
+    int32_t matrixR2C2;
+    int32_t outputBias0;
+    int32_t outputBias1;
+    int32_t outputBias2;
+    int32_t inputBias0;
+    int32_t inputBias1;
+    int32_t inputBias2;
+    int32_t meanChn0;
+    int32_t meanChn1;
+    int32_t meanChn2;
+    int32_t meanChn3;
+    float minChn0;
+    float minChn1;
+    float minChn2;
+    float minChn3;
+    float varReciChn0;
+    float varReciChn1;
+    float varReciChn2;
+    float varReciChn3;
+    aclFormat srcFormat;
+    aclDataType srcDatatype;
+    size_t srcDimNum;
+    size_t shapeCount;
+    aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
+    aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
 } aclAippInfo;
 
 /**
@@ -339,7 +339,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model,  size_t modelSize,
+                                               uint32_t *modelId);
 
 /**
  * @ingroup AscendCL
@@ -361,8 +362,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
-                                                       size_t workSize, void *weightPtr, size_t weightSize);
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
+                                                       uint32_t *modelId, void *workPtr, size_t workSize,
+                                                       void *weightPtr, size_t weightSize);
 
 /**
  * @ingroup AscendCL
@@ -385,9 +387,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
-                                                      void *workPtr, size_t workSize, void *weightPtr,
-                                                      size_t weightSize);
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
+                                                      uint32_t *modelId, void *workPtr, size_t workSize,
+                                                      void *weightPtr, size_t weightSize);
 
 /**
  * @ingroup AscendCL
@@ -422,8 +424,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
-                                                    const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
-                                                    size_t outputQNum);
+                                                    const uint32_t *inputQ, size_t inputQNum,
+                                                    const uint32_t *outputQ, size_t outputQNum);
 
 /**
  * @ingroup AscendCL
@@ -453,8 +455,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem
  */
-ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
-                                                aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
+                                                aclmdlDataset *output, aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -829,11 +831,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
-                                                    int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
-                                                    int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
-                                                    int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
+                                                    int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
+                                                    int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
+                                                    int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
                                                     uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
                                                     uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
                                                     uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
@@ -849,7 +851,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);
 
 /**
@@ -863,7 +865,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);
 
 /**
@@ -878,7 +880,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet,
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
                                                        int32_t srcImageSizeH);
 
@@ -898,10 +900,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet,
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
-                                                    int32_t scfInputSizeH, int32_t scfOutputSizeW,
-                                                    int32_t scfOutputSizeH, uint64_t batchIndex);
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
+                                                    int8_t scfSwitch,
+                                                    int32_t scfInputSizeW,
+                                                    int32_t scfInputSizeH,
+                                                    int32_t scfOutputSizeW,
+                                                    int32_t scfOutputSizeH,
+                                                    uint64_t batchIndex);
 
 /**
  * @ingroup AscendCL
@@ -919,9 +925,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
-                                                     int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
+                                                     int8_t cropSwitch,
+                                                     int32_t cropStartPosW,
+                                                     int32_t cropStartPosH,
+                                                     int32_t cropSizeW,
+                                                     int32_t cropSizeH,
                                                      uint64_t batchIndex);
 
 /**
@@ -940,7 +950,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
                                                         int32_t paddingSizeTop, int32_t paddingSizeBottom,
                                                         int32_t paddingSizeLeft, int32_t paddingSizeRight,
@@ -961,10 +971,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
-                                                       int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
-                                                       int16_t dtcPixelMeanChn3, uint64_t batchIndex);
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
+                                                       int16_t dtcPixelMeanChn0,
+                                                       int16_t dtcPixelMeanChn1,
+                                                       int16_t dtcPixelMeanChn2,
+                                                       int16_t dtcPixelMeanChn3,
+                                                       uint64_t batchIndex);
 
 /**
  * @ingroup AscendCL
@@ -981,10 +994,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
-                                                      float dtcPixelMinChn1, float dtcPixelMinChn2,
-                                                      float dtcPixelMinChn3, uint64_t batchIndex);
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
+                                                      float dtcPixelMinChn0,
+                                                      float dtcPixelMinChn1,
+                                                      float dtcPixelMinChn2,
+                                                      float dtcPixelMinChn3,
+                                                      uint64_t batchIndex);
 
 /**
  * @ingroup AscendCL
@@ -1001,10 +1017,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
  * @retval OtherValues Failure
  *
  * @see aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
-                                                       float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
-                                                       float dtcPixelVarReciChn3, uint64_t batchIndex);
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
+                                                       float dtcPixelVarReciChn0,
+                                                       float dtcPixelVarReciChn1,
+                                                       float dtcPixelVarReciChn2,
+                                                       float dtcPixelVarReciChn3,
+                                                       uint64_t batchIndex);
 
 /**
  * @ingroup AscendCL
@@ -1020,8 +1039,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
  *
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
+                                                aclmdlDataset *dataset,
+                                                size_t index,
                                                 const aclmdlAIPP *aippParmsSet);
 
 /**
@@ -1038,8 +1059,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset
  *
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
+                                                       aclmdlDataset *dataset,
+                                                       size_t index,
                                                        const aclmdlAIPP *aippParmsSet);
 
 /**
@@ -1057,8 +1080,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD
  *
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
- */
-ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
+                                               size_t index,
+                                               aclmdlInputAippType *type,
                                                size_t *dynamicAttachedDataIndex);
 
 /**
@@ -1075,7 +1100,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
  *
  * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
  * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
 
 /**
@@ -1094,11 +1119,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind
  *
  * @retval ACL_SUCCESS The function is successfully executed
  * @retval OtherValues Failure
- */
-ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
-                                                      char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
-                                                      size_t *numInputs, aclTensorDesc **outputDesc,
-                                                      size_t *numOutputs);
+*/
+ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
+    uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
+    aclTensorDesc **outputDesc, size_t *numOutputs);
 
 /**
  * @ingroup AscendCL
@@ -1106,7 +1130,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
 
 /**
@@ -1117,7 +1141,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
 
 /**
@@ -1126,7 +1150,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
 
 /**
@@ -1138,7 +1162,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
+*/
 ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);
 
 /**
@@ -1148,7 +1172,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand
  * @retval the aclmdlConfigHandle pointer
  *
  * @see aclmdlDestroyConfigHandle
- */
+*/
 ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();
 
 /**
@@ -1177,10 +1201,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
-                                                const void *attrValue, size_t valueSize);
+    const void *attrValue, size_t valueSize);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_MODEL_H_
+#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h
index d2e59bfb..b1be0d6e 100644
--- a/inc/external/acl/acl_op.h
+++ b/inc/external/acl/acl_op.h
@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length);
 static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;
 
 typedef enum aclEngineType {
-  ACL_ENGINE_SYS,
-  ACL_ENGINE_AICORE,
-  ACL_ENGINE_VECTOR,
+    ACL_ENGINE_SYS,
+    ACL_ENGINE_AICORE,
+    ACL_ENGINE_VECTOR,
 } aclopEngineType;
 
 /**
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
-                                                  const uint8_t *values);
+    const uint8_t *values);
 
 /**
  * @ingroup AscendCL
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
-                                                 const int64_t *values);
+    const int64_t *values);
 
 /**
  * @ingroup AscendCL
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
-                                                   const float *values);
+    const float *values);
 
 /**
  * @ingroup AscendCL
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
-                                                    const char **values);
+    const char **values);
 
 /**
  * @ingroup AscendCL
@@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
-                                                     const int *numValues, const int64_t *const values[]);
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
+                                                     const char *attrName,
+                                                     int numLists,
+                                                     const int *numValues,
+                                                     const int64_t *const values[]);
 
 /**
  * @ingroup AscendCL
@@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char
  * @retval OtherValues Failure
  */
 ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
-ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
-                                          const aclDataBuffer *const inputs[], int numOutputs,
-                                          const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
-                                          const aclopAttr *attr, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
+                                          int numInputs,
+                                          const aclTensorDesc *const inputDesc[],
+                                          const aclDataBuffer *const inputs[],
+                                          int numOutputs,
+                                          const aclTensorDesc *const outputDesc[],
+                                          aclDataBuffer *const outputs[],
+                                          const aclopAttr *attr,
+                                          aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
-                                            aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
-                                            aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
+                                            int numInputs,
+                                            aclTensorDesc *inputDesc[],
+                                            aclDataBuffer *inputs[],
+                                            int numOutputs,
+                                            aclTensorDesc *outputDesc[],
+                                            aclDataBuffer *outputs[],
+                                            aclopAttr *attr,
+                                            aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
-                                               const aclTensorDesc *const inputDesc[], int numOutputs,
-                                               const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
+                                               int numInputs,
+                                               const aclTensorDesc *const inputDesc[],
+                                               int numOutputs,
+                                               const aclTensorDesc *const outputDesc[],
+                                               const aclopAttr *opAttr,
                                                aclopHandle **handle);
 
 /**
@@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
  *
  * @see aclopCreateHandle | aclCreateDataBuffer
  */
-ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
-                                                 const aclDataBuffer *const inputs[], int numOutputs,
-                                                 aclDataBuffer *const outputs[], aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
+                                                 int numInputs,
+                                                 const aclDataBuffer *const inputs[],
+                                                 int numOutputs,
+                                                 aclDataBuffer *const outputs[],
+                                                 aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
-                                       const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
+ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
+                                       const aclDataBuffer *srcBuffer,
+                                       const aclTensorDesc *dstDesc,
+                                       aclDataBuffer *dstBuffer,
+                                       uint8_t truncate,
                                        aclrtStream stream);
 
 /**
@@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
+ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
+                                                      aclTensorDesc *dstDesc,
+                                                      uint8_t truncate,
                                                       aclopHandle **handle);
 
+
 /**
  * @ingroup AscendCL
  * @brief create kernel
@@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac
  *
  * @see aclopCompile
  */
-ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
-                                               void *binData, int binSize, aclopEngineType enginetype,
+ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
+                                               const char *kernelId,
+                                               const char *kernelName,
+                                               void *binData,
+                                               int binSize,
+                                               aclopEngineType enginetype,
                                                aclDataDeallocator deallocator);
 
+
 /**
  * @ingroup AscendCL
  * @brief create kernel
@@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
-                                     const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+typedef aclError (*aclopCompileFunc)(int numInputs,
+                                     const aclTensorDesc *const inputDesc[],
+                                     int numOutputs,
+                                     const aclTensorDesc *const outputDesc[],
+                                     const aclopAttr *opAttr,
                                      aclopKernelDesc *aclopKernelDesc);
 
 /**
@@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
-                                                const void *args, uint32_t argSize);
+ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
+                                                const char *kernelId,
+                                                uint32_t blockDim,
+                                                const void *args,
+                                                uint32_t argSize);
 
 /**
  * @ingroup AscendCL
@@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
-                                               const aclTensorDesc *const inputDesc[], int numOutputs,
-                                               const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
+ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
+                                               int numInputs,
+                                               const aclTensorDesc *const inputDesc[],
+                                               int numOutputs,
+                                               const aclTensorDesc *const outputDesc[],
+                                               const aclopAttr *attr);
 
 /**
  * @ingroup AscendCL
@@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
-                                             aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
+ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
+                                             int numInputs,
+                                             aclTensorDesc *inputDesc[],
+                                             aclDataBuffer *inputs[],
+                                             int numOutputs,
+                                             aclTensorDesc *outputDesc[],
                                              aclopAttr *attr);
 
+
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_OP_H_
+#endif // INC_EXTERNAL_ACL_ACL_OP_H_
diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h
index adae90c7..6bbb855c 100644
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -24,18 +24,21 @@
 extern "C" {
 #endif
 
-typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
+typedef enum aclCompileType {
+    ACL_COMPILE_SYS,
+    ACL_COMPILE_UNREGISTERED
+} aclopCompileType;
 
 typedef enum {
-  ACL_PRECISION_MODE,
-  ACL_AICORE_NUM,
-  ACL_AUTO_TUNE_MODE,
-  ACL_OP_SELECT_IMPL_MODE,
-  ACL_OPTYPELIST_FOR_IMPLMODE,
-  ACL_OP_DEBUG_LEVEL,
-  ACL_DEBUG_DIR,
-  ACL_OP_COMPILER_CACHE_MODE,
-  ACL_OP_COMPILER_CACHE_DIR
+    ACL_PRECISION_MODE,
+    ACL_AICORE_NUM,
+    ACL_AUTO_TUNE_MODE,
+    ACL_OP_SELECT_IMPL_MODE,
+    ACL_OPTYPELIST_FOR_IMPLMODE,
+    ACL_OP_DEBUG_LEVEL,
+    ACL_DEBUG_DIR,
+    ACL_OP_COMPILER_CACHE_MODE,
+    ACL_OP_COMPILER_CACHE_DIR
 } aclCompileOpt;
 
 /**
@@ -56,10 +59,15 @@ typedef enum {
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
-                                          int numOutputs, const aclTensorDesc *const outputDesc[],
-                                          const aclopAttr *attr, aclopEngineType engineType,
-                                          aclopCompileType compileFlag, const char *opPath);
+ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
+                                          int numInputs,
+                                          const aclTensorDesc *const inputDesc[],
+                                          int numOutputs,
+                                          const aclTensorDesc *const outputDesc[],
+                                          const aclopAttr *attr,
+                                          aclopEngineType engineType,
+                                          aclopCompileType compileFlag,
+                                          const char *opPath);
 
 /**
  * @ingroup AscendCL
@@ -82,10 +90,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
-  const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
-  int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
-  aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
+    int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
+    const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
+    const char *opPath, aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -103,4 +112,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h
index 990c70cf..d2675124 100644
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -23,21 +23,21 @@
 extern "C" {
 #endif
 
-#define ACL_PROF_ACL_API 0x0001
-#define ACL_PROF_TASK_TIME 0x0002
-#define ACL_PROF_AICORE_METRICS 0x0004
-#define ACL_PROF_AICPU 0x0008
+#define ACL_PROF_ACL_API                0x0001
+#define ACL_PROF_TASK_TIME              0x0002
+#define ACL_PROF_AICORE_METRICS         0x0004
+#define ACL_PROF_AICPU                  0x0008
 
-#define ACL_PROF_MAX_OP_NAME_LEN 257
-#define ACL_PROF_MAX_OP_TYPE_LEN 65
+#define ACL_PROF_MAX_OP_NAME_LEN        257
+#define ACL_PROF_MAX_OP_TYPE_LEN        65
 
 typedef enum {
-  ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
-  ACL_AICORE_PIPE_UTILIZATION = 1,
-  ACL_AICORE_MEMORY_BANDWIDTH = 2,
-  ACL_AICORE_L0B_AND_WIDTH = 3,
-  ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
-  ACL_AICORE_NONE = 0xFF
+    ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
+    ACL_AICORE_PIPE_UTILIZATION = 1,
+    ACL_AICORE_MEMORY_BANDWIDTH = 2,
+    ACL_AICORE_L0B_AND_WIDTH = 3,
+    ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
+    ACL_AICORE_NONE = 0xFF
 } aclprofAicoreMetrics;
 
 typedef struct aclprofConfig aclprofConfig;
@@ -98,8 +98,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
  * @see aclprofDestroyConfig
  */
 ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
-                                                       aclprofAicoreMetrics aicoreMetrics,
-                                                       aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
+    aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
 
 /**
  * @ingroup AscendCL
@@ -139,7 +138,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
  *
  * @see aclprofModelUnSubscribe
  */
-ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
+ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
+    const aclprofSubscribeConfig *profSubscribeConfig);
 
 /**
  * @ingroup AscendCL
@@ -167,7 +167,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
  * @see aclprofDestroySubscribeConfig
  */
 ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
-                                                                         aclprofAicoreMetrics aicoreMetrics, void *fd);
+    aclprofAicoreMetrics aicoreMetrics, void *fd);
 
 /**
  * @ingroup AscendCL
@@ -219,8 +219,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
-                                              size_t opTypeLen);
+ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
+    char *opType, size_t opTypeLen);
 
 /**
  * @ingroup AscendCL
@@ -235,8 +235,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
-                                              size_t opNameLen);
+ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
+    char *opName, size_t opNameLen);
 
 /**
  * @ingroup AscendCL
@@ -293,4 +293,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_PROF_H_
+#endif // INC_EXTERNAL_ACL_PROF_H_
diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h
index eb6b4240..6fd2da6e 100644
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -26,62 +26,62 @@ extern "C" {
 #endif
 
 typedef enum aclrtRunMode {
-  ACL_DEVICE,
-  ACL_HOST,
+    ACL_DEVICE,
+    ACL_HOST,
 } aclrtRunMode;
 
 typedef enum aclrtTsId {
-  ACL_TS_ID_AICORE = 0,
-  ACL_TS_ID_AIVECTOR = 1,
-  ACL_TS_ID_RESERVED = 2,
+    ACL_TS_ID_AICORE   = 0,
+    ACL_TS_ID_AIVECTOR = 1,
+    ACL_TS_ID_RESERVED = 2,
 } aclrtTsId;
 
 typedef enum aclrtEventStatus {
-  ACL_EVENT_STATUS_COMPLETE = 0,
-  ACL_EVENT_STATUS_NOT_READY = 1,
-  ACL_EVENT_STATUS_RESERVED = 2,
+    ACL_EVENT_STATUS_COMPLETE  = 0,
+    ACL_EVENT_STATUS_NOT_READY = 1,
+    ACL_EVENT_STATUS_RESERVED  = 2,
 } aclrtEventStatus;
 
 typedef enum aclrtCallbackBlockType {
-  ACL_CALLBACK_NO_BLOCK,
-  ACL_CALLBACK_BLOCK,
+    ACL_CALLBACK_NO_BLOCK,
+    ACL_CALLBACK_BLOCK,
 } aclrtCallbackBlockType;
 
 typedef enum aclrtMemcpyKind {
-  ACL_MEMCPY_HOST_TO_HOST,
-  ACL_MEMCPY_HOST_TO_DEVICE,
-  ACL_MEMCPY_DEVICE_TO_HOST,
-  ACL_MEMCPY_DEVICE_TO_DEVICE,
+    ACL_MEMCPY_HOST_TO_HOST,
+    ACL_MEMCPY_HOST_TO_DEVICE,
+    ACL_MEMCPY_DEVICE_TO_HOST,
+    ACL_MEMCPY_DEVICE_TO_DEVICE,
 } aclrtMemcpyKind;
 
 typedef enum aclrtMemMallocPolicy {
-  ACL_MEM_MALLOC_HUGE_FIRST,
-  ACL_MEM_MALLOC_HUGE_ONLY,
-  ACL_MEM_MALLOC_NORMAL_ONLY,
-  ACL_MEM_MALLOC_HUGE_FIRST_P2P,
-  ACL_MEM_MALLOC_HUGE_ONLY_P2P,
-  ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
+    ACL_MEM_MALLOC_HUGE_FIRST,
+    ACL_MEM_MALLOC_HUGE_ONLY,
+    ACL_MEM_MALLOC_NORMAL_ONLY,
+    ACL_MEM_MALLOC_HUGE_FIRST_P2P,
+    ACL_MEM_MALLOC_HUGE_ONLY_P2P,
+    ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
 } aclrtMemMallocPolicy;
 
 typedef enum aclrtMemAttr {
-  ACL_DDR_MEM,
-  ACL_HBM_MEM,
-  ACL_DDR_MEM_HUGE,
-  ACL_DDR_MEM_NORMAL,
-  ACL_HBM_MEM_HUGE,
-  ACL_HBM_MEM_NORMAL,
-  ACL_DDR_MEM_P2P_HUGE,
-  ACL_DDR_MEM_P2P_NORMAL,
-  ACL_HBM_MEM_P2P_HUGE,
-  ACL_HBM_MEM_P2P_NORMAL,
+    ACL_DDR_MEM,
+    ACL_HBM_MEM,
+    ACL_DDR_MEM_HUGE,
+    ACL_DDR_MEM_NORMAL,
+    ACL_HBM_MEM_HUGE,
+    ACL_HBM_MEM_NORMAL,
+    ACL_DDR_MEM_P2P_HUGE,
+    ACL_DDR_MEM_P2P_NORMAL,
+    ACL_HBM_MEM_P2P_HUGE,
+    ACL_HBM_MEM_P2P_NORMAL,
 } aclrtMemAttr;
 
 typedef enum aclrtGroupAttr {
-  ACL_GROUP_AICORE_INT,
-  ACL_GROUP_AIV_INT,
-  ACL_GROUP_AIC_INT,
-  ACL_GROUP_SDMANUM_INT,
-  ACL_GROUP_ASQNUM_INT
+    ACL_GROUP_AICORE_INT,
+    ACL_GROUP_AIV_INT,
+    ACL_GROUP_AIC_INT,
+    ACL_GROUP_SDMANUM_INT,
+    ACL_GROUP_ASQNUM_INT
 } aclrtGroupAttr;
 
 typedef struct tagRtGroupInfo aclrtGroupInfo;
@@ -472,7 +472,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre
  */
 ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);
 
-/**
+ /**
  * @ingroup AscendCL
  * @brief Queries an event's status
  *
@@ -534,7 +534,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start,
  *
  * @see aclrtFree | acldvppMalloc | aclrtMallocCached
  */
-ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
+                                         size_t size,
+                                         aclrtMemMallocPolicy policy);
 
 /**
  * @ingroup AscendCL
@@ -557,7 +559,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal
  *
  * @see aclrtFree | aclrtMalloc
  */
-ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
+                                               size_t size,
+                                               aclrtMemMallocPolicy policy);
 
 /**
  * @ingroup AscendCL
@@ -648,7 +652,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
+                                         size_t destMax,
+                                         const void *src,
+                                         size_t count,
                                          aclrtMemcpyKind kind);
 
 /**
@@ -695,31 +702,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t
  *
  * @see aclrtSynchronizeStream
  */
-ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
-                                              aclrtMemcpyKind kind, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
+                                              size_t destMax,
+                                              const void *src,
+                                              size_t count,
+                                              aclrtMemcpyKind kind,
+                                              aclrtStream stream);
 
 /**
- * @ingroup AscendCL
- * @brief Asynchronous initialize memory
- * and set contents of memory to specified value async
- *
- * @par Function
+* @ingroup AscendCL
+* @brief Asynchronous initialize memory
+* and set contents of memory to specified value async
+*
+* @par Function
  *  The memory to be initialized is on the Host or device side,
  *  and the system determines whether
  *  it is host or device according to the address
  *
- * @param devPtr [IN]      destination address pointer
- * @param maxCount [IN]    Max length of destination address memory
- * @param value [IN]       set value
- * @param count [IN]       the number of byte to set
- * @param stream [IN]      asynchronized task stream
- *
- * @retval ACL_SUCCESS The function is successfully executed.
- * @retval OtherValues Failure
- *
- * @see aclrtSynchronizeStream
- */
-ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
+* @param devPtr [IN]      destination address pointer
+* @param maxCount [IN]    Max length of destination address memory
+* @param value [IN]       set value
+* @param count [IN]       the number of byte to set
+* @param stream [IN]      asynchronized task stream
+*
+* @retval ACL_SUCCESS The function is successfully executed.
+* @retval OtherValues Failure
+*
+* @see aclrtSynchronizeStream
+*/
+ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
+                                              size_t maxCount,
+                                              int32_t value,
+                                              size_t count,
                                               aclrtStream stream);
 
 /**
@@ -865,8 +879,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
  *
  * @see aclrtGetGroupCount | aclrtGetAllGroupInfo
  */
-ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId,
-                                                     aclrtGroupAttr attr, void *attrValue, size_t valueLen,
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
+                                                     int32_t groupId,
+                                                     aclrtGroupAttr attr,
+                                                     void *attrValue,
+                                                     size_t valueLen,
                                                      size_t *paramRetSize);
 
 /**
@@ -929,4 +946,5 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_RT_H_
+#endif // INC_EXTERNAL_ACL_ACL_RT_H_
+
diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h
index c357518d..61995121 100644
--- a/inc/external/acl/acl_tdt.h
+++ b/inc/external/acl/acl_tdt.h
@@ -24,10 +24,10 @@ extern "C" {
 #endif
 
 enum acltdtTensorType {
-  ACL_TENSOR_DATA_UNDEFINED = -1,
-  ACL_TENSOR_DATA_TENSOR,
-  ACL_TENSOR_DATA_END_OF_SEQUENCE,
-  ACL_TENSOR_DATA_ABNORMAL
+    ACL_TENSOR_DATA_UNDEFINED = -1,
+    ACL_TENSOR_DATA_TENSOR,
+    ACL_TENSOR_DATA_END_OF_SEQUENCE,
+    ACL_TENSOR_DATA_ABNORMAL
 };
 
 typedef struct acltdtDataItem acltdtDataItem;
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *
  *
  * @retval null for failed
  * @retval OtherValues success
- */
+*/
 ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);
 
 /**
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt
  *
  * @retval 0 for failed
  * @retval OtherValues success
- */
+*/
 ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);
 
 /**
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI
  *
  * @retval 0 for failed
  * @retval OtherValues success
- */
+*/
 ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);
 
 /**
@@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte
  *
  * @see acltdtDestroyDataItem
  */
-ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
-                                                         aclDataType dataType, void *data, size_t size);
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
+                                                         const int64_t *dims,
+                                                         size_t dimNum,
+                                                         aclDataType dataType,
+                                                         void *data,
+                                                         size_t size);
 
 /**
  * @ingroup AscendCL
@@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
  *
  * @see acltdtReceiveTensor
  */
-ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
+ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
+                                              const acltdtDataset *dataset,
                                               int32_t timeout);
 
 /**
@@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
  *
  * @see acltdtSendTensor
  */
-ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
+ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
+                                                 acltdtDataset *dataset,
                                                  int32_t timeout);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_ACL_TDT_H_
+#endif //INC_EXTERNAL_ACL_ACL_TDT_H_
+
diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h
index 041fc7ae..b477a18c 100644
--- a/inc/external/acl/error_codes/ge_error_codes.h
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <stddef.h>
 
 #ifdef __cplusplus
diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
index d2373525..47f16d9f 100644
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -23,79 +23,80 @@
 extern "C" {
 #endif
 
-static const int32_t ACL_RT_SUCCESS = 0;  // success
+static const int32_t ACL_RT_SUCCESS                          = 0; // success
 
-static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000;             // param invalid
-static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001;          // invalid device id
-static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002;              // current context null
-static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003;            // stream not in current context
-static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004;             // model not in current context
-static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005;              // stream not in model
-static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006;   // event timestamp invalid
-static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007;  // event timestamp reversal
-static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008;            // memory address unaligned
-static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009;                 // open file failed
-static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010;                // write file failed
-static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011;          // error subscribe stream
-static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012;          // error subscribe thread
-static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013;             // group not set
-static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014;          // group not create
-static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callback not register to stream
-static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
-static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
-static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
+static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
 
-static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
-static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
-static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002;          // memory free error
-static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003;     // aicore over flow
-static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004;            // no device
-static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005;  // resource alloc fail
-static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006;        // no permission
-static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event resource
-static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
-static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
-static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
 
-static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;          // runtime internal error
-static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                // ts internel error
-static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;        // task full in stream
-static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;       // task empty in stream
-static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;     // stream not complete
-static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;         // end of sequence
-static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;      // event not complete
-static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;   // context release error
-static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;             // soc version error
-static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;   // task type not support
-static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;          // ts lost heartbeat
-static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;           // model execute failed
-static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;          // report timeout
-static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                 // sys dma error
-static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;          // aicore timeout
-static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;        // aicore exception
-static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;   // aicore trap exception
-static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;           // aicpu timeout
-static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;         // aicpu exception
-static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;  // aicpu datadump response error
-static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;     // aicpu model operate response error
-static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;         // profiling error
-static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;               // ipc error
-static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;      // model abort normal
-static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;    // kernel unregistering
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;     // ringbuffer not init
-static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;      // ringbuffer no data
-static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;           // kernel lookup error
-static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;        // kernel register duplicate
-static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;     // debug register failed
-static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;   // debug unregister failed
-static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not in current context
-static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
-static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
 
-static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;  // drv internal error
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
+#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h
index 3d81eb2b..a2bd8c61 100644
--- a/inc/external/acl/ops/acl_cblas.h
+++ b/inc/external/acl/ops/acl_cblas.h
@@ -23,9 +23,17 @@
 extern "C" {
 #endif
 
-typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
+typedef enum aclTransType {
+    ACL_TRANS_N,
+    ACL_TRANS_T,
+    ACL_TRANS_NZ,
+    ACL_TRANS_NZ_T
+} aclTransType;
 
-typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
+typedef enum aclComputeType {
+    ACL_COMPUTE_HIGH_PRECISION,
+    ACL_COMPUTE_LOW_PRECISION
+} aclComputeType;
 
 /**
  * @ingroup AscendCL
@@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
-ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
-                                           aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
-                                           const void *beta, void *y, int incy, aclDataType dataTypeY,
-                                           aclComputeType type, aclrtStream stream);
+*/
+ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
+    const void *alpha, const void *a, int lda, aclDataType dataTypeA,
+    const void *x, int incx, aclDataType dataTypeX,
+    const void *beta, void *y, int incy, aclDataType dataTypeY,
+    aclComputeType type, aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co
  *
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
- */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
-                                                          aclDataType dataTypeX, aclDataType dataTypeY,
-                                                          aclComputeType type, aclopHandle **handle);
+*/
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
+                                                          int m,
+                                                          int n,
+                                                          aclDataType dataTypeA,
+                                                          aclDataType dataTypeX,
+                                                          aclDataType dataTypeY,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
 
 /**
  * @ingroup AscendCL
@@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
-                                          const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
-                                          const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
+ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
+                                          int m,
+                                          int n,
+                                          const aclFloat16 *alpha,
+                                          const aclFloat16 *a,
+                                          int lda,
+                                          const aclFloat16 *x,
+                                          int incx,
+                                          const aclFloat16 *beta,
+                                          aclFloat16 *y,
+                                          int incy,
+                                          aclComputeType type,
                                           aclrtStream stream);
 
 /**
@@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
+                                                         int m,
+                                                         int n,
+                                                         aclComputeType type,
                                                          aclopHandle **handle);
 
 /**
@@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
-                                           int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
-                                           int incy, aclComputeType type, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
+                                           int m,
+                                           int n,
+                                           const int32_t *alpha,
+                                           const int8_t *a,
+                                           int lda,
+                                           const int8_t *x,
+                                           int incx,
+                                           const int32_t *beta,
+                                           int32_t *y,
+                                           int incy,
+                                           aclComputeType type,
+                                           aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
+                                                          int m,
+                                                          int n,
+                                                          aclComputeType type,
                                                           aclopHandle **handle);
 
 /**
@@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
-                                           int k, const void *alpha, const void *matrixA, int lda,
-                                           aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
-                                           const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
-                                           aclComputeType type, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
+                                           aclTransType transB,
+                                           aclTransType transC,
+                                           int m,
+                                           int n,
+                                           int k,
+                                           const void *alpha,
+                                           const void *matrixA,
+                                           int lda,
+                                           aclDataType dataTypeA,
+                                           const void *matrixB,
+                                           int ldb,
+                                           aclDataType dataTypeB,
+                                           const void *beta,
+                                           void *matrixC,
+                                           int ldc,
+                                           aclDataType dataTypeC,
+                                           aclComputeType type,
+                                           aclrtStream stream);
+
 
 /**
  * @ingroup AscendCL
@@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
-                                                          int m, int n, int k, aclDataType dataTypeA,
-                                                          aclDataType dataTypeB, aclDataType dataTypeC,
-                                                          aclComputeType type, aclopHandle **handle);
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
+                                                          aclTransType transB,
+                                                          aclTransType transC,
+                                                          int m,
+                                                          int n,
+                                                          int k,
+                                                          aclDataType dataTypeA,
+                                                          aclDataType dataTypeB,
+                                                          aclDataType dataTypeC,
+                                                          aclComputeType type,
+                                                          aclopHandle **handle);
+
 
 /**
  * @ingroup AscendCL
@@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
-                                          int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
-                                          const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
-                                          aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
+                                          aclTransType transB,
+                                          aclTransType transC,
+                                          int m,
+                                          int n,
+                                          int k,
+                                          const aclFloat16 *alpha,
+                                          const aclFloat16 *matrixA,
+                                          int lda,
+                                          const aclFloat16 *matrixB,
+                                          int ldb,
+                                          const aclFloat16 *beta,
+                                          aclFloat16 *matrixC,
+                                          int ldc,
+                                          aclComputeType type,
+                                          aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
-                                                         int m, int n, int k, aclComputeType type,
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
+                                                         aclTransType transB,
+                                                         aclTransType transC,
+                                                         int m,
+                                                         int n,
+                                                         int k,
+                                                         aclComputeType type,
                                                          aclopHandle **handle);
 
 /**
@@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
-                                           int k, const int32_t *alpha, const int8_t *matrixA, int lda,
-                                           const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
-                                           int ldc, aclComputeType type, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
+                                           aclTransType transB,
+                                           aclTransType transC,
+                                           int m,
+                                           int n,
+                                           int k,
+                                           const int32_t *alpha,
+                                           const int8_t *matrixA,
+                                           int lda,
+                                           const int8_t *matrixB,
+                                           int ldb,
+                                           const int32_t *beta,
+                                           int32_t *matrixC,
+                                           int ldc,
+                                           aclComputeType type,
+                                           aclrtStream stream);
+
 
 /**
  * @ingroup AscendCL
@@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
-                                                          int m, int n, int k, aclComputeType type,
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
+                                                          aclTransType transB,
+                                                          aclTransType transC,
+                                                          int m,
+                                                          int n,
+                                                          int k,
+                                                          aclComputeType type,
                                                           aclopHandle **handle);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
index 8f5d3904..42ec4a8d 100644
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output
 
 // Supported Pixel Format
 enum acldvppPixelFormat {
-  PIXEL_FORMAT_YUV_400 = 0,                      // 0
-  PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1,           // 1
-  PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2,           // 2
-  PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3,           // 3
-  PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4,           // 4
-  PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5,           // 5
-  PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6,           // 6
-  PIXEL_FORMAT_YUYV_PACKED_422 = 7,              // 7
-  PIXEL_FORMAT_UYVY_PACKED_422 = 8,              // 8
-  PIXEL_FORMAT_YVYU_PACKED_422 = 9,              // 9
-  PIXEL_FORMAT_VYUY_PACKED_422 = 10,             // 10
-  PIXEL_FORMAT_YUV_PACKED_444 = 11,              // 11
-  PIXEL_FORMAT_RGB_888 = 12,                     // 12
-  PIXEL_FORMAT_BGR_888 = 13,                     // 13
-  PIXEL_FORMAT_ARGB_8888 = 14,                   // 14
-  PIXEL_FORMAT_ABGR_8888 = 15,                   // 15
-  PIXEL_FORMAT_RGBA_8888 = 16,                   // 16
-  PIXEL_FORMAT_BGRA_8888 = 17,                   // 17
-  PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18,  // 18
-  PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19,  // 19
-  PIXEL_FORMAT_YVU_PLANAR_420 = 20,              // 20
-  PIXEL_FORMAT_YVU_PLANAR_422,
-  PIXEL_FORMAT_YVU_PLANAR_444,
-  PIXEL_FORMAT_RGB_444 = 23,
-  PIXEL_FORMAT_BGR_444,
-  PIXEL_FORMAT_ARGB_4444,
-  PIXEL_FORMAT_ABGR_4444,
-  PIXEL_FORMAT_RGBA_4444,
-  PIXEL_FORMAT_BGRA_4444,
-  PIXEL_FORMAT_RGB_555,
-  PIXEL_FORMAT_BGR_555,
-  PIXEL_FORMAT_RGB_565,
-  PIXEL_FORMAT_BGR_565,
-  PIXEL_FORMAT_ARGB_1555,
-  PIXEL_FORMAT_ABGR_1555,
-  PIXEL_FORMAT_RGBA_1555,
-  PIXEL_FORMAT_BGRA_1555,
-  PIXEL_FORMAT_ARGB_8565,
-  PIXEL_FORMAT_ABGR_8565,
-  PIXEL_FORMAT_RGBA_8565,
-  PIXEL_FORMAT_BGRA_8565,
-  PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
-  PIXEL_FORMAT_RGB_BAYER_10BPP,
-  PIXEL_FORMAT_RGB_BAYER_12BPP,
-  PIXEL_FORMAT_RGB_BAYER_14BPP,
-  PIXEL_FORMAT_RGB_BAYER_16BPP,
-  PIXEL_FORMAT_BGR_888_PLANAR = 70,
-  PIXEL_FORMAT_HSV_888_PACKAGE,
-  PIXEL_FORMAT_HSV_888_PLANAR,
-  PIXEL_FORMAT_LAB_888_PACKAGE,
-  PIXEL_FORMAT_LAB_888_PLANAR,
-  PIXEL_FORMAT_S8C1,
-  PIXEL_FORMAT_S8C2_PACKAGE,
-  PIXEL_FORMAT_S8C2_PLANAR,
-  PIXEL_FORMAT_S16C1,
-  PIXEL_FORMAT_U8C1,
-  PIXEL_FORMAT_U16C1,
-  PIXEL_FORMAT_S32C1,
-  PIXEL_FORMAT_U32C1,
-  PIXEL_FORMAT_U64C1,
-  PIXEL_FORMAT_S64C1,
-  PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
-  PIXEL_FORMAT_YVU_SEMIPLANAR_440,
-  PIXEL_FORMAT_FLOAT32,
-  PIXEL_FORMAT_BUTT,
-  PIXEL_FORMAT_UNKNOWN = 10000
+    PIXEL_FORMAT_YUV_400 = 0, // 0
+    PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
+    PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
+    PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
+    PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
+    PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
+    PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
+    PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
+    PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
+    PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
+    PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
+    PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
+    PIXEL_FORMAT_RGB_888 = 12, // 12
+    PIXEL_FORMAT_BGR_888 = 13, // 13
+    PIXEL_FORMAT_ARGB_8888 = 14, // 14
+    PIXEL_FORMAT_ABGR_8888 = 15, // 15
+    PIXEL_FORMAT_RGBA_8888 = 16, // 16
+    PIXEL_FORMAT_BGRA_8888 = 17, // 17
+    PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
+    PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
+    PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
+    PIXEL_FORMAT_YVU_PLANAR_422,
+    PIXEL_FORMAT_YVU_PLANAR_444,
+    PIXEL_FORMAT_RGB_444 = 23,
+    PIXEL_FORMAT_BGR_444,
+    PIXEL_FORMAT_ARGB_4444,
+    PIXEL_FORMAT_ABGR_4444,
+    PIXEL_FORMAT_RGBA_4444,
+    PIXEL_FORMAT_BGRA_4444,
+    PIXEL_FORMAT_RGB_555,
+    PIXEL_FORMAT_BGR_555,
+    PIXEL_FORMAT_RGB_565,
+    PIXEL_FORMAT_BGR_565,
+    PIXEL_FORMAT_ARGB_1555,
+    PIXEL_FORMAT_ABGR_1555,
+    PIXEL_FORMAT_RGBA_1555,
+    PIXEL_FORMAT_BGRA_1555,
+    PIXEL_FORMAT_ARGB_8565,
+    PIXEL_FORMAT_ABGR_8565,
+    PIXEL_FORMAT_RGBA_8565,
+    PIXEL_FORMAT_BGRA_8565,
+    PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
+    PIXEL_FORMAT_RGB_BAYER_10BPP,
+    PIXEL_FORMAT_RGB_BAYER_12BPP,
+    PIXEL_FORMAT_RGB_BAYER_14BPP,
+    PIXEL_FORMAT_RGB_BAYER_16BPP,
+    PIXEL_FORMAT_BGR_888_PLANAR = 70,
+    PIXEL_FORMAT_HSV_888_PACKAGE,
+    PIXEL_FORMAT_HSV_888_PLANAR,
+    PIXEL_FORMAT_LAB_888_PACKAGE,
+    PIXEL_FORMAT_LAB_888_PLANAR,
+    PIXEL_FORMAT_S8C1,
+    PIXEL_FORMAT_S8C2_PACKAGE,
+    PIXEL_FORMAT_S8C2_PLANAR,
+    PIXEL_FORMAT_S16C1,
+    PIXEL_FORMAT_U8C1,
+    PIXEL_FORMAT_U16C1,
+    PIXEL_FORMAT_S32C1,
+    PIXEL_FORMAT_U32C1,
+    PIXEL_FORMAT_U64C1,
+    PIXEL_FORMAT_S64C1,
+    PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
+    PIXEL_FORMAT_YVU_SEMIPLANAR_440,
+    PIXEL_FORMAT_FLOAT32,
+    PIXEL_FORMAT_BUTT,
+    PIXEL_FORMAT_UNKNOWN = 10000
 };
 
 // Stream Format
-enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
+enum acldvppStreamFormat {
+    H265_MAIN_LEVEL = 0,
+    H264_BASELINE_LEVEL,
+    H264_MAIN_LEVEL,
+    H264_HIGH_LEVEL
+};
 
 // Supported Channel Mode
-enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
+enum acldvppChannelMode {
+    DVPP_CHNMODE_VPC = 1,
+    DVPP_CHNMODE_JPEGD = 2,
+    DVPP_CHNMODE_JPEGE = 4
+};
 
 // Supported Border Type
-enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
+enum acldvppBorderType {
+    BORDER_CONSTANT = 0,
+    BORDER_REPLICATE,
+    BORDER_REFLECT,
+    BORDER_REFLECT_101
+};
 
 // Venc parameter type
 enum aclvencChannelDescParamType {
-  ACL_VENC_THREAD_ID_UINT64 = 0,
-  ACL_VENC_CALLBACK_PTR,
-  ACL_VENC_PIXEL_FORMAT_UINT32,
-  ACL_VENC_ENCODE_TYPE_UINT32,
-  ACL_VENC_PIC_WIDTH_UINT32,
-  ACL_VENC_PIC_HEIGHT_UINT32,
-  ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
-  ACL_VENC_BUF_ADDR_PTR,
-  ACL_VENC_BUF_SIZE_UINT32,
-  ACL_VENC_RC_MODE_UINT32,
-  ACL_VENC_SRC_RATE_UINT32,
-  ACL_VENC_MAX_BITRATE_UINT32,
-  ACL_VENC_MAX_IP_PROP_UINT32
+    ACL_VENC_THREAD_ID_UINT64 = 0,
+    ACL_VENC_CALLBACK_PTR,
+    ACL_VENC_PIXEL_FORMAT_UINT32,
+    ACL_VENC_ENCODE_TYPE_UINT32,
+    ACL_VENC_PIC_WIDTH_UINT32,
+    ACL_VENC_PIC_HEIGHT_UINT32,
+    ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
+    ACL_VENC_BUF_ADDR_PTR,
+    ACL_VENC_BUF_SIZE_UINT32,
+    ACL_VENC_RC_MODE_UINT32,
+    ACL_VENC_SRC_RATE_UINT32,
+    ACL_VENC_MAX_BITRATE_UINT32,
+    ACL_VENC_MAX_IP_PROP_UINT32
 };
 
 // Jpeg picture format
 enum acldvppJpegFormat {
-  ACL_JPEG_CSS_444 = 0,
-  ACL_JPEG_CSS_422,
-  ACL_JPEG_CSS_420,
-  ACL_JPEG_CSS_GRAY,
-  ACL_JPEG_CSS_440,
-  ACL_JPEG_CSS_411,
-  ACL_JPEG_CSS_UNKNOWN = 1000
+    ACL_JPEG_CSS_444 = 0,
+    ACL_JPEG_CSS_422,
+    ACL_JPEG_CSS_420,
+    ACL_JPEG_CSS_GRAY,
+    ACL_JPEG_CSS_440,
+    ACL_JPEG_CSS_411,
+    ACL_JPEG_CSS_UNKNOWN = 1000
 };
 
 /**
@@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
  * @retval null for failed.
  * @retval other success
  */
-ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
+ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
+                                                             uint32_t right,
+                                                             uint32_t top,
                                                              uint32_t bottom);
 
 /**
@@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
+                                                 uint32_t left,
+                                                 uint32_t right,
+                                                 uint32_t top,
                                                  uint32_t bottom);
 
 /**
@@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
  * @retval ACL_SUCCESS for success, other for failure
  */
 ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
-                                                        aclvencChannelDescParamType paramType, size_t length,
-                                                        const void *param);
+    aclvencChannelDescParamType paramType, size_t length, const void *param);
 
 /**
  * @ingroup AscendCL
@@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
  * @retval ACL_SUCCESS for success, other for failure
  */
 ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
-                                                        aclvencChannelDescParamType paramType, size_t length,
-                                                        size_t *paramRetSize, void *param);
+    aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);
 
 /**
  * @ingroup AscendCL
@@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
+                                                     uint32_t size,
+                                                     uint32_t *width,
+                                                     uint32_t *height,
                                                      int32_t *components);
 
 /**
@@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
-                                                       uint32_t *height, int32_t *components,
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
+                                                       uint32_t size,
+                                                       uint32_t *width,
+                                                       uint32_t *height,
+                                                       int32_t *components,
                                                        acldvppJpegFormat *format);
 
 /**
@@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_
  * @retval OtherValues Failure
  */
 ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
-                                                       const acldvppJpegeConfig *config, uint32_t *size);
+                                                       const acldvppJpegeConfig *config,
+                                                       uint32_t *size);
 
 /**
  * @ingroup AscendCL
@@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
-                                                       acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
+                                                       uint32_t dataSize,
+                                                       acldvppPixelFormat outputPixelFormat,
+                                                       uint32_t *decSize);
 
 /**
  * @ingroup AscendCL
@@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
-                                                    uint32_t *height, int32_t *components);
+ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
+                                                    uint32_t dataSize,
+                                                    uint32_t *width,
+                                                    uint32_t *height,
+                                                    int32_t *components);
 
 /**
  * @ingroup AscendCL
@@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
-                                                      acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
+                                                      uint32_t dataSize,
+                                                      acldvppPixelFormat outputPixelFormat,
+                                                      uint32_t *decSize);
 
 /**
  * @ingroup AscendCL
@@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
  * @see acldvppCreateChannel | acldvppCreatePicDesc
  * | acldvppCreateResizeConfig
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                   acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
+ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
+                                                   acldvppPicDesc *inputDesc,
+                                                   acldvppPicDesc *outputDesc,
+                                                   acldvppResizeConfig *resizeConfig,
                                                    aclrtStream stream);
 
 /**
@@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                 acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
+                                                 acldvppPicDesc *inputDesc,
+                                                 acldvppPicDesc *outputDesc,
+                                                 acldvppRoiConfig *cropArea,
                                                  aclrtStream stream);
 
 /**
@@ -1734,9 +1769,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
  * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
  */
 ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
-                                                      acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
-                                                      uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
-                                                      acldvppRoiConfig *cropAreas[], aclrtStream stream);
+                                                      acldvppBatchPicDesc *srcBatchPicDescs,
+                                                      uint32_t *roiNums,
+                                                      uint32_t size,
+                                                      acldvppBatchPicDesc *dstBatchPicDescs,
+                                                      acldvppRoiConfig *cropAreas[],
+                                                      aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1759,9 +1797,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                         acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
-                                                         acldvppRoiConfig *pasteArea, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                         acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         acldvppRoiConfig *cropArea,
+                                                         acldvppRoiConfig *pasteArea,
+                                                         aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1786,11 +1827,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
  *
  * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
-                                                              acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
-                                                              uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
-                                                              acldvppRoiConfig *cropAreas[],
-                                                              acldvppRoiConfig *pasteAreas[], aclrtStream stream);
+ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                               acldvppBatchPicDesc *srcBatchPicDescs,
+                                                               uint32_t *roiNums,
+                                                               uint32_t size,
+                                                               acldvppBatchPicDesc *dstBatchPicDescs,
+                                                               acldvppRoiConfig *cropAreas[],
+                                                               acldvppRoiConfig *pasteAreas[],
+                                                               aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1818,8 +1862,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc
  */
-ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
-                                                    acldvppPicDesc *outputDesc, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
+                                                    const void *data,
+                                                    uint32_t size,
+                                                    acldvppPicDesc *outputDesc,
+                                                    aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1837,8 +1884,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
  *
  * @see acldvppCreateChannel | acldvppCreateJpegeConfig
  */
-ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                    const void *data, uint32_t *size, acldvppJpegeConfig *config,
+ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
+                                                    acldvppPicDesc *inputDesc,
+                                                    const void *data,
+                                                    uint32_t *size,
+                                                    acldvppJpegeConfig *config,
                                                     aclrtStream stream);
 
 /**
@@ -1856,8 +1906,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc
  */
-ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
-                                                   acldvppPicDesc *outputDesc, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
+                                                   const void *data,
+                                                   uint32_t size,
+                                                   acldvppPicDesc *outputDesc,
+                                                   aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1912,8 +1965,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
  *
  * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
  */
-ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
-                                              acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
+ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
+                                              acldvppStreamDesc *input,
+                                              acldvppPicDesc *output,
+                                              aclvdecFrameConfig *config,
+                                              void *userData);
 
 /**
  * @ingroup AscendCL
@@ -1932,8 +1988,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a
  *
  * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
  */
-ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
-                                                     aclvdecFrameConfig *config, void *userData);
+ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
+                                                     acldvppStreamDesc *input,
+                                                     aclvdecFrameConfig *config,
+                                                     void *userData);
 
 /**
  * @ingroup AscendCL
@@ -1954,8 +2012,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                         acldvppPicDesc *outputDesc, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
+                                                         acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1977,8 +2037,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
-                                                    acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
+                                                    acldvppPicDesc *inputDesc,
+                                                    acldvppPicDesc *outputDesc,
+                                                    void *reserve,
+                                                    aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -1990,7 +2053,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
+                                                       uint32_t mode);
 
 /**
  * @ingroup AscendCL
@@ -2025,7 +2089,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
+                                                          uint32_t outMode);
 
 /**
  * @ingroup AscendCL
@@ -2122,7 +2187,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
  * @retval ACL_SUCCESS The function is successfully executed.
  * @retval OtherValues Failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
+ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
+                                                  uint32_t dim,
+                                                  uint8_t **data,
                                                   uint32_t *len);
 /**
  * @ingroup AscendCL
@@ -2140,8 +2207,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u
  * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
  */
 ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
-                                                         const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
-                                                         const acldvppLutMap *lutMap, aclrtStream stream);
+                                                         const acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc,
+                                                         const acldvppLutMap *lutMap,
+                                                         aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -2162,7 +2231,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
  *
  * @retval ACL_SUCCESS for success, other for failure
  */
-ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
+                                                         uint32_t index,
                                                          double value);
 
 /**
@@ -2307,8 +2377,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
  * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
  */
 ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
-                                                       const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
-                                                       const acldvppBorderConfig *borderConfig, aclrtStream stream);
+                                                       const acldvppPicDesc *inputDesc,
+                                                       acldvppPicDesc *outputDesc,
+                                                       const acldvppBorderConfig *borderConfig,
+                                                       aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -2325,8 +2397,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
  *
  * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
  */
-ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
-                                                     acldvppHist *hist, void *reserve, aclrtStream stream);
+ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
+                                                     acldvppPicDesc *srcPicDesc,
+                                                     acldvppHist *hist,
+                                                     void *reserve,
+                                                     aclrtStream stream);
 
 /**
  * @ingroup AscendCL
@@ -2335,7 +2410,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
  * @retval null for failed.
  * @retval OtherValues success.
  */
-ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
+ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();
 
 /**
  * @ingroup AscendCL
@@ -2392,7 +2467,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
  *
  * @see acldvppCreateHist | acldvppVpcCalcHistAsync
  */
-ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);
 
 /**
  * @ingroup AscendCL
@@ -2415,4 +2490,4 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h
index 27dc367a..40cd50cb 100644
--- a/inc/external/acl/ops/acl_fv.h
+++ b/inc/external/acl/ops/acl_fv.h
@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult;
 
 // search operation type
 enum aclfvSearchType {
-  SEARCH_1_N,  // 1:N operation type
-  SEARCH_N_M   // N:M operation type
+    SEARCH_1_N, // 1:N operation type
+    SEARCH_N_M  // N:M operation type
 };
 
 /**
@@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t
  * @retval OtherValues success.
  */
 ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
-                                                             uint32_t featureLen, uint32_t featureCount,
-                                                             uint8_t *featureData, uint32_t featureDataLen);
+    uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);
 
 /**
  * @ingroup AscendCL
@@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp
  * @retval null for failed. OtherValues success
  */
 ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
-                                                               uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
-                                                               uint32_t *resultOffset, float *resultDistance,
-                                                               uint32_t dataLen);
+    uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
+    uint32_t dataLen);
 
 /**
  * @ingroup AscendCL
@@ -350,4 +348,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput
 }
 #endif
 
-#endif  // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
diff --git a/inc/external/hccl/hccl/hccl.h b/inc/external/hccl/hccl/hccl.h
new file mode 100644
index 00000000..311e78f2
--- /dev/null
+++ b/inc/external/hccl/hccl/hccl.h
@@ -0,0 +1,133 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl.h
+ * @brief HCCL API
+ */
+
+#ifndef HCCL_H_
+#define HCCL_H_
+
+#include <hccl/hccl_types.h>
+#include <acl/acl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/**
+ * @brief Initialize HCCL.
+ *
+ * @param clusterInfo A string identifying the cluster info file path, include file name.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief Get hccl root info.
+ *
+ * @param rootInfo A pointer identifying the hccl root info.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);
+
+/**
+ * @brief Initialize HCCL with root info.
+ *
+ * @param nRanks A integer identifying the rank size of the cluster.
+ * @param rootInfo A struct identifying the hccl root info.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief AllReduce operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param count An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult 
+ */
+extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, 
+HcclReduceOp op, HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Broadcast operator.
+ *
+ * @param buf A pointer identifying the data address of the operator.
+ * @param count An integer(u64) identifying the number of the data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param root An integer(u32) identifying the the root rank in the operator.
+ * @param comm A pointer identifying the communication resource based on
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult 
+ */
+extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, 
+aclrtStream stream);
+
+/**
+ * @brief ReduceScatter operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param recvCount An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult 
+ */
+extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, 
+HcclReduceOp op, HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief AllGather operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param sendCount An integer(u64) identifying the number of the input data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult 
+ */
+extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, 
+HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Destroy HCCL comm
+ *
+ * @param comm A pointer identifying the communication resource targetting
+ * @return HcclResult
+ * @see HcclCommInitClusterInfo()
+ */
+extern HcclResult HcclCommDestroy(HcclComm comm);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // HCCL_H_
diff --git a/inc/external/hccl/hccl/hccl_types.h b/inc/external/hccl/hccl/hccl_types.h
new file mode 100644
index 00000000..50a64795
--- /dev/null
+++ b/inc/external/hccl/hccl/hccl_types.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl_types.h
+ * @brief HCCL data type definition 
+ * 
+ */
+ 
+#ifndef HCCL_TYPES_H_
+#define HCCL_TYPES_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/**
+ * @brief HCCL functions return value definition
+ */
+typedef enum {
+    HCCL_SUCCESS = 0,               /**< success */
+    HCCL_E_PARA = 1,                /**< parameter error */
+    HCCL_E_PTR = 2,                 /**< empty pointer */
+    HCCL_E_MEMORY = 3,              /**< memory error */
+    HCCL_E_INTERNAL = 4,            /**< internal error */
+    HCCL_E_NOT_SUPPORT = 5,         /**< not support feature */
+    HCCL_E_NOT_FOUND = 6,           /**< not found specific resource */
+    HCCL_E_UNAVAIL = 7,             /**< resource unavailable */
+    HCCL_E_SYSCALL = 8,             /**< call system interface error */
+    HCCL_E_TIMEOUT = 9,             /**< timeout */
+    HCCL_E_OPEN_FILE_FAILURE = 10,  /**< open file fail */
+    HCCL_E_TCP_CONNECT = 11,        /**< tcp connect fail */
+    HCCL_E_ROCE_CONNECT = 12,       /**< roce connect fail */
+    HCCL_E_TCP_TRANSFER = 13,       /**< tcp transfer fail */
+    HCCL_E_ROCE_TRANSFER = 14,      /**< roce transfer fail */
+    HCCL_E_RUNTIME = 15,            /**< call runtime api fail */
+    HCCL_E_DRV = 16,                /**< call driver api fail */
+    HCCL_E_PROFILING = 17,          /**< call profiling api fail */
+    HCCL_E_CCE = 18,                /**< call cce api fail */
+    HCCL_E_NETWORK = 19,            /**< call network api fail */
+    HCCL_E_RESERVED                 /**< reserved */
+} HcclResult;
+
+/**
+ * @brief handle to HCCL communicator
+ */
+typedef void *HcclComm;
+
+/**
+ * @brief HCCL Reduction opperation
+ */
+typedef enum {
+    HCCL_REDUCE_SUM = 0,    /**< sum */
+    HCCL_REDUCE_PROD = 1,   /**< prod */
+    HCCL_REDUCE_MAX = 2,    /**< max */
+    HCCL_REDUCE_MIN = 3,    /**< min */
+    HCCL_REDUCE_RESERVED    /**< reserved */
+} HcclReduceOp;
+
+/**
+ * @brief HCCL data type
+ */
+typedef enum {
+    HCCL_DATA_TYPE_INT8 = 0,    /**< int8 */
+    HCCL_DATA_TYPE_INT16 = 1,   /**< int16 */
+    HCCL_DATA_TYPE_INT32 = 2,   /**< int32 */
+    HCCL_DATA_TYPE_FP16 = 3,    /**< fp16 */
+    HCCL_DATA_TYPE_FP32 = 4,    /**< fp32 */
+    HCCL_DATA_TYPE_INT64 = 5,    /**< int64 */
+    HCCL_DATA_TYPE_UINT64 = 6,    /**< uint64 */
+    HCCL_DATA_TYPE_RESERVED     /**< reserved */
+} HcclDataType;
+
+const uint32_t HCCL_ROOT_INFO_BYTES =  4108; // 4108: root info length
+
+/**
+ * @brief HCCL root info
+ */
+typedef struct HcclRootInfoDef {
+    char internal[HCCL_ROOT_INFO_BYTES];
+} HcclRootInfo;
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // HCCL_TYPES_H_
diff --git a/inc/external/runtime/runtime/rt_error_codes.h b/inc/external/runtime/runtime/rt_error_codes.h
new file mode 100644
index 00000000..47f16d9f
--- /dev/null
+++ b/inc/external/runtime/runtime/rt_error_codes.h
@@ -0,0 +1,102 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS                          = 0; // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
new file mode 100644
index 00000000..703225e8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_OP_TYPE_LIST_H_
+#define AICPU_OP_TYPE_LIST_H_
+
+enum OpKernelType {
+    TF_KERNEL,
+    CPU_KERNEL
+};
+
+enum ReturnCode {
+    OP_TYPE_NOT_SUPPORT,
+    FORMAT_NOT_SUPPORT,
+    DTYPE_NOT_SUPPORT
+};
+
+#pragma pack(push, 1)
+//One byte alignment
+struct SysOpInfo {
+    uint64_t opLen;
+    uint64_t opType;
+    OpKernelType kernelsType;
+};
+
+struct OpParamInfo {
+    uint64_t num;
+    uint64_t dtypeList;
+    uint64_t formatList;
+};
+
+struct SysOpCheckInfo {
+    uint64_t opListNum;
+    uint64_t offSetLen;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+
+struct SysOpCheckResp {
+    uint64_t opListNum;
+    bool isWithoutJson;
+    uint64_t returnCodeList;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+#pragma pack(pop)
+#endif  // AICPU_OP_TYPE_LIST_H_
diff --git a/third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h
new file mode 100644
index 00000000..72e21f6f
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_TASK_STRUCT_H_
+#define AICPU_TASK_STRUCT_H_
+
+#include <cstdint>
+
+namespace aicpu {
+
+#pragma pack(push, 1)
+struct AicpuParamHead
+{
+    uint32_t        length;                    // Total length: include cunstom message
+    uint32_t        ioAddrNum;                 // Input and output address number
+    uint32_t        extInfoLength;             // extInfo struct Length
+    uint64_t        extInfoAddr;               // extInfo address
+};
+#pragma pack(pop)
+
+}  // namespace aicpu
+
+#endif  // AICPU_TASK_STRUCT_H_
+
diff --git a/third_party/fwkacllib/inc/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/inc/cce/aicpu_engine.h
new file mode 100644
index 00000000..042d952b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/aicpu_engine.h
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_ENGINE_H__
+#define AICPU_ENGINE_H__
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+  AE_STATUS_SUCCESS = 0,
+  AE_STATUS_BAD_PARAM = 1,
+  AE_STATUS_OPEN_SO_FAILED = 2,
+  AE_STATUS_GET_KERNEL_NAME_FAILED = 3,
+  AE_STATUS_INNER_ERROR = 4,
+  AE_STATUS_KERNEL_API_INNER_ERROR = 5,
+  AE_STATUS_END_OF_SEQUENCE = 6,
+  AE_STATUS_DUMP_FAILED = 7,
+  AE_STATUS_TASK_WAIT = 101,
+  AE_STATUS_RESERVED
+} aeStatus_t;
+
+/**
+ * @ingroup aicpu engine
+ * @brief aeCallInterface:
+ *          a interface to call a function in a op kernfel lib
+ * @param [in] addr     void *,  should be STR_KERNEL * format
+ * @return aeStatus_t
+ */
+aeStatus_t aeCallInterface(void *addr);
+
+/**
+ * @ingroup aicpu engine
+ * @brief aeBatchLoadKernelSo:
+ *          a interface to load kernel so
+ * @param [in] loadSoNum  load so number
+ * @param [in] soPaths    load so paths
+ * @param [in] soNames    load so names
+ * @return aeStatus_t
+ */
+aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // AICPU_ENGINE_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h
new file mode 100644
index 00000000..8c0c1847
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_ENGINE_STRUCT_H__
+#define AICPU_ENGINE_STRUCT_H__
+
+#include "fwk_adpt_struct.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+    The different framwork we adapted for.
+*/
+typedef enum {
+  FMK_KERNEL_TYPE_TF = 0,
+  FMK_KERNEL_TYPE_CF = 10,
+  FMK_KERNEL_TYPE_PT = 20,
+  FMK_KERNEL_TYPE_RESERVED
+} FwkkernelType_t;
+
+#pragma pack(push, 1)
+typedef struct {
+  uint32_t fwkKernelType;  // FwkkernelType_t
+  union {
+    ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
+  } fwkKernelBase;
+} STR_FWK_OP_KERNEL;
+#pragma pack(pop)
+
+#pragma pack(push, 1)
+struct SessionInfo {
+  uint64_t sessionId;
+  uint64_t kernelId;
+  bool sessFlag;
+};
+#pragma pack(pop)
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // AICPU_ENGINE_STRUCT_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/blas_struct.h b/third_party/fwkacllib/inc/inc/cce/blas_struct.h
new file mode 100644
index 00000000..e0bcee4c
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/blas_struct.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CC_BLAS_STRUCT_API__
+#define CC_BLAS_STRUCT_API__
+
+#include <stdint.h>
+
+typedef enum { CCBLAS_FILL_MODE_LOWER = 0, CCBLAS_FILL_MODE_UPPER = 1 } ccblasFillMode_t;
+
+typedef enum {
+  CCBLAS_OP_N = 0,
+  CCBLAS_OP_T = 1,
+} ccblasOperation_t;
+
+typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t;
+
+#endif  // CC_BLAS_STRUCT_API__
diff --git a/third_party/fwkacllib/inc/inc/cce/cce.h b/third_party/fwkacllib/inc/inc/cce/cce.h
new file mode 100644
index 00000000..0cd9613a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/cce.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CCE_H__
+#define CCE_H__
+
+#include <stdint.h>
+#include "cce_def.hpp"
+
+namespace cce {
+
+/**
+ * @ingroup cce
+ * @brief create cc handler
+ * @param [in|out] handle   point of cc handler
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreate(ccHandle_t *handle);
+
+/**
+ * @ingroup cce
+ * @brief destroy cc handler
+ * @param [in] *handle   cc handler
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroy(ccHandle_t *handle);
+
+/**
+ * @ingroup cce
+ * @brief bind stream with specified cc handler
+ * @param [in] handle   cc handler
+ * @param [in] streamId   stream
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t streamId);
+
+/**
+ * @ingroup cce
+ * @brief get the stream from cc handler
+ * @param [in] handle   cc handler
+ * @param [in|out] streamId   point of stream
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *streamId);
+
+/**
+ * @ingroup cce
+ * @brief get the stream from cc handler
+ * @param [in] dataTypeTransMode   mode of data type transform
+ * @param [in] inputData   input data point
+ * @param [in] inputDataSize   input data size
+ * @param [in|out] outputData   output data point
+ * @param [in] outputDataSize   output data size
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransDataType(ccDataTypeTransMode_t dataTypeTransMode, const void *inputData, uint32_t inputDataSize,
+                           void *outputData, const uint32_t outputDataSize);
+/**
+ * @ingroup cce
+ * @brief cce sys init func
+ */
+void cceSysInit();
+
+/**
+ * @ingroup cce
+ * @brief cce Log Start up func
+ */
+void cceLogStartup();
+
+/**
+ * @ingroup cce
+ * @brief cce Log Shut down func
+ */
+void cceLogShutdown();
+
+/**
+ * @ingroup cce
+ * @brief set the profiling on or off
+ * @param [in] const unsigned char* target: The engine gets it from ENV. Don't need care about it.
+ * @param const char* job_ctx: identifies profiling job
+ * @param [in] uint32_t flag: value: 0, on ; 1, off.
+ * @return ccStatus_t value: 0, success; 1, fail.
+ */
+ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag);
+
+};  // namespace cce
+
+#endif  // CCE_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/cce_def.hpp b/third_party/fwkacllib/inc/inc/cce/cce_def.hpp
new file mode 100644
index 00000000..7b1a1b8a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/cce_def.hpp
@@ -0,0 +1,152 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CCE_DEF_H__
+#define CCE_DEF_H__
+
+#include "runtime/rt.h"
+
+namespace cce {
+
+/**
+ * @ingroup cce
+ * @brief memory configure for fusion
+ */
+typedef struct TagCceFusionMemCfg {
+  uint64_t memAddr;        /**< memAddr */
+  uint32_t memSize;        /**< memSize */
+  uint32_t addrChangeFlag; /**< op data addr change flag. value:0,valid;1,not valid */
+  uint32_t poolFlag;       /**< mempool flag : value:0,is valid; value: 1, not valid */
+  TagCceFusionMemCfg() {
+    memAddr = 0;
+    memSize = 0;
+    addrChangeFlag = 0;
+    poolFlag = 0;
+  }
+} CceFusionMemCfg_t;
+/**
+ * @ingroup cce
+ * @brief return value
+ */
+typedef enum tagCcStatus {
+  CC_STATUS_SUCCESS = 0,         /**< succ */
+  CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
+  CC_STATUS_ALLOC_FAILED = 2,    /**< alloc mem failed */
+  CC_STATUS_BAD_PARAM = 3,       /**< para check failed */
+  CC_STATUS_INTERNAL_ERROR = 4,  /**< internal error */
+  CC_STATUS_KERNEL_ERROR = 5,    /**< kernel error */
+  CC_STATUS_RUNTIME_ERROR = 6,   /**< runtime error */
+  CC_STATUS_NOT_SUPPORTED = 7,   /**< unsupport error */
+  CC_STATUS_INVALID_VALUE = 7,   /**< invalid value error for blas*/
+  CC_STATUS_RESERVED             /**< just for check */
+} ccStatus_t;
+
+/**
+ * @ingroup cce
+ * @brief original data type
+ */
+typedef enum tagCcDataType {
+  CC_DATA_FLOAT = 0,            /**< float type */
+  CC_DATA_HALF,                 /**< fp16 type */
+  CC_DATA_INT8,                 /**< int8 type */
+  CC_DATA_INT32,                /**< int32 type */
+  CC_DATA_UINT8,                /**< uint8 type */
+  CC_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
+  CC_DATA_INT16,                /**< int16 type */
+  CC_DATA_UINT16,               /**< uint16 type */
+  CC_DATA_UINT32,               /**< uint32 type */
+  CC_DATA_INT64,                /**< int64 type */
+  CC_DATA_UINT64,               /**< uint64 type */
+  CC_DATA_DOUBLE,               /**< double type */
+  CC_DATA_BOOL,                 /**< bool type */
+  CC_DATA_DUAL,                 /**< dual output type */
+  CC_DATA_DUAL_SUB_INT8,        /**< dual output int8 type */
+  CC_DATA_DUAL_SUB_UINT8,       /**< dual output uint8 type */
+  CC_DATA_COMPLEX64,
+  CC_DATA_COMPLEX128,
+  CC_DATA_QINT8,
+  CC_DATA_QINT16,
+  CC_DATA_QINT32,
+  CC_DATA_QUINT8,
+  CC_DATA_QUINT16,
+  CC_DATA_RESERVED
+} ccDataType_t;
+
+/**
+ * @ingroup cce
+ * @brief save context of cce library
+ */
+typedef struct tagCcContext {
+  rtStream_t streamId;
+  uint32_t opIndex;
+} ccContext_t;
+
+typedef struct tagCcContext *ccHandle_t;
+
+/**
+ * @ingroup cce
+ * @brief mode of data type transform
+ */
+typedef enum tagCcDataTypeTransMode {
+  CC_DATATYPE_TRANS_FLOAT_NO_TRANS = 0, /**< origin data is float, no trans */
+  CC_DATATYPE_TRANS_FP16_NO_TRANS,      /**< origin data is fp16, no trans */
+  CC_DATATYPE_TRANS_INT8_NO_TRANS,      /**< origin data is int8, no trans */
+  CC_DATATYPE_TRANS_FLOAT_TO_FP16,      /**< data type float trans to fp16 */
+  CC_DATATYPE_TRANS_FP16_TO_FLOAT,      /**< data type fp16 trans to float */
+  CC_DATATYPE_TRANS_FLOAT_TO_INT8,      /**< data type float trans to int8 */
+  CC_DATATYPE_TRANS_INT8_TO_FLOAT,      /**< data type int8 trans to float */
+  CC_DATATYPE_TRANS_UINT8_TO_FLOAT,     /**< data type uint8 trans to float */
+  CC_DATATYPE_TRANS_UINT8_NO_TRANS,     /**< origin data is uint8, no trans */
+  CC_DATATYPE_TRANS_INT32_NO_TRANS,     /**< data type uint8 trans to float */
+  CC_DATATYPE_TRANS_UINT16_NO_TRANS,    /** < origin data is uint16, no trans*/
+  CC_DATATYPE_TRANS_UINT16_TO_FLOAT,    /** < data type uint16 trans to float*/
+  CC_DATATYPE_TRANS_MODE_RESERVED
+} ccDataTypeTransMode_t;
+
+typedef struct tagContextInfo {
+  ccHandle_t handle;
+  rtStream_t stream;
+  uint8_t *memBase;
+  uint64_t totalMemSize;
+  uint8_t *weightsMemBase;
+  uint64_t weightsMemSize;
+  uint8_t *weightsMemBaseHost;
+} ContextInfo;
+
+/**
+ * @ingroup cce
+ * @brief cce function parameter type
+ */
+typedef enum tagCcFuncType {
+  CC_FUSION_L2,
+  GLOBAL_MEMORY_CLEAR,
+  MAX_NUM,
+} ccFuncParamType_t;
+
+/**
+ * @ingroup cce
+ * @brief cce set function point state
+ */
+ccStatus_t ccSetFuncState(ccFuncParamType_t type, bool isOpen);
+
+/**
+ * @ingroup cce
+ * @brief cce get function point state
+ */
+bool ccGetFuncState(ccFuncParamType_t type);
+
+}  // namespace cce
+#endif  // CCE_DEF_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp b/third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp
new file mode 100644
index 00000000..bf48e9fc
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ATTR_LIST_HPP__
+#define ATTR_LIST_HPP__
+
+#include "catch.hpp"
+
+/**
+ * @ingroup util
+ * @brief frame  Error Value
+ */
+#define ATTR_SUCCESS (0)
+#define ATTR_ERROR_NULL_POINT (1)
+#define ATTR_ERROR_ALREADY_EXIST (2)
+#define ATTR_ERROR_NOT_EXIST (3)
+#define ATTR_ERROR_BUFFER_NOT_ENOUGH (4)
+#define ATTR_ERROR_BAD_PARAM (5)
+#define ATTR_ERROR_ALLOC_FAIL (6)
+#define ATTR_ERROR_FREE_FAIL (7)
+#define ATTR_ERROR_RESERVED (8)
+
+struct AttrListPrivate;
+/**
+ * @ingroup util
+ * @brief attribute list
+ */
+class AttrList {
+ public:
+  AttrList();
+  AttrList(uint32_t initLen);
+  ~AttrList();
+  AttrList(const AttrList &rhs) = delete;
+  AttrList &operator=(const AttrList &rhs);
+
+ public:
+  /**
+   * @ingroup util
+   * @brief add paras
+   * @param [in] attrId   attribute id
+   * @param [in] attrLen   length of attribute
+   * @param [in] attrValue   point to attribute
+   * @return ccStatus_t
+   */
+  uint32_t Add(uint32_t attrId, uint32_t attrLen, const void *attrValue);
+
+  /**
+   * @ingroup util
+   * @brief read paras
+   * @param [in] attrId   attribute id
+   * @param [in] attrLen   point to length of attribute
+   * @param [in] attrValue   reference of point to attribute
+   * @return ccStatus_t
+   */
+  uint32_t Get(uint32_t attrId, uint32_t &attrLen, const void *&attr_value) const;
+
+  /**
+   * @ingroup util
+   * @brief get the length of attribute list
+   * @return length of attribute
+   */
+  uint32_t Length() const;
+
+ private:
+  AttrListPrivate *impl_;
+  uint32_t initLen_;
+  uint32_t Init();
+};
+#endif  // ATTR_LIST_HPP__
diff --git a/third_party/fwkacllib/inc/inc/cce/common/catch.hpp b/third_party/fwkacllib/inc/inc/cce/common/catch.hpp
new file mode 100644
index 00000000..c440be53
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/common/catch.hpp
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CATCH_HPP_
+#define CATCH_HPP_
+
+#include <stdint.h>
+#include <iostream>
+
+#define ERROR_CODE() __catch_error_code
+#define ERROR_LINE_NO() __catch_error_line_no
+#define ERROR_PROC() __catch_error_line_no = __LINE__;
+
+#define PROC                                   \
+  uint32_t __catch_error_code = 0x7FFFFFCC;    \
+  uint32_t __catch_error_line_no = 0xFFFFFFFF; \
+  {
+#define END_PROC \
+  }              \
+  __tabErrorCode:
+#define THROW(errcode)              \
+  {                                 \
+    __catch_error_code = (errcode); \
+    ERROR_PROC();                   \
+    goto __tabErrorCode;            \
+  }
+#define EXEC(func)                                                    \
+  {                                                                   \
+    if (0 != (__catch_error_code = (func))) THROW(__catch_error_code) \
+  }
+#define EXEC_EX1(func, error_code)     \
+  {                                    \
+    if (0 != (func)) THROW(error_code) \
+  }
+#define EXEC_EX(func, succRet, error_code)                          \
+  {                                                                 \
+    if (succRet != (__catch_error_code = (func))) THROW(error_code) \
+  }
+#define ASSERT_EXEC(func, succRet)                                       \
+  {                                                                      \
+    if (succRet != (__catch_error_code = (func))) /*GO_ASSERT_FALSE();*/ \
+      THROW(__catch_error_code)                                          \
+  }                                                                      \
+  }
+#define NEW_ERROR_EXEC(errcode, func, succRet) \
+  {                                            \
+    if (succRet != (func)) {                   \
+      THROW(errcode)                           \
+    }                                          \
+  }
+#define JUDGE(errcode, expr) \
+  {                          \
+    if (!(expr)) {           \
+      THROW(errcode)         \
+    }                        \
+  }
+#define ASSERT_JUDGE(errcode, expr)       \
+  {                                       \
+    if (!(expr)) { /*GO_ASSERT_FALSE();*/ \
+      THROW(errcode)                      \
+    }                                     \
+  }
+#define JUDGE_FALSE(errcode, expr) \
+  {                                \
+    if (expr) {                    \
+      THROW(errcode)               \
+    }                              \
+  }
+#define JUDGE_CONTINUE(expr) \
+  {                          \
+    if (expr) {              \
+      continue;              \
+    }                        \
+  }
+#define CATCH_ERROR(errcode) if (__catch_error_code == (errcode)) {  // ERROR_LOG();
+#define CATCH_ALL_ERROR {
+#define END_CATCH_ERROR }
+#define FINAL \
+  __tabFinal:
+#define END_FINAL /*GO_ASSERT_FALSE()*/ ;
+#define GOTO_FINAL() goto __tabFinal;
+#endif  // CATCH_HPP_
diff --git a/third_party/fwkacllib/inc/inc/cce/compiler_stub.h b/third_party/fwkacllib/inc/inc/cce/compiler_stub.h
new file mode 100644
index 00000000..00ea467e
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/compiler_stub.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMPILER_STUB_H__
+#define COMPILER_STUB_H__
+
+namespace cce {
+
+/**
+ * @ingroup cce
+ * @brief compiler stub init func
+ */
+bool compilerStubInit();
+
+/**
+ * @ingroup cce
+ * @brief compiler stub free func
+ */
+bool compilerStubFree();
+
+};  // namespace cce
+
+#endif  // COMPILER_STUB_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/customize.h b/third_party/fwkacllib/inc/inc/cce/customize.h
new file mode 100644
index 00000000..7dd97af1
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/customize.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CC_CUSTOMIZE_API__
+#define CC_CUSTOMIZE_API__
+
+#include <stdint.h>
+
+#define CC_DEVICE_DIM_MAX 8
+typedef enum tagOpTensorFormat
+{
+    OP_TENSOR_FORMAT_NC1HWC0 = 0,
+    OP_TENSOR_FORMAT_ND,
+    OP_TENSOR_FORMAT_RESERVED,
+
+} opTensorFormat_t;
+
+
+typedef enum tagOpDataType
+{
+    OP_DATA_FLOAT = 0,             /**< float type */
+    OP_DATA_HALF,                  /**< fp16 type */
+    OP_DATA_INT8,                  /**< int8 type */
+    OP_DATA_INT32,                 /**< int32 type */
+    OP_DATA_UINT8,                 /**< uint8 type */
+    OP_DATA_HALF_UINT16_PROPOSAL,  /**<mixed type for proposal*/
+    OP_DATA_RESERVED
+} opDataType_t;
+
+typedef struct tagOpTensor
+{
+    // real dim info
+    opTensorFormat_t format;
+    opDataType_t data_type;
+    int32_t dim_cnt;
+    int32_t mm;
+    int32_t dim[CC_DEVICE_DIM_MAX];
+} opTensor_t;
+
+typedef opTensor_t tagCcAICPUTensor;
+typedef void * rtStream_t;
+typedef void (*aicpu_run_func)(opTensor_t **, void **, int32_t,
+                               opTensor_t **, void **, int32_t, void *, rtStream_t);
+
+
+#endif  // CC_CUSTOMIZE_API__
+
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn.h b/third_party/fwkacllib/inc/inc/cce/dnn.h
new file mode 100644
index 00000000..03ca7d5a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn.h
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_H__
+#define DNN_H__
+
+#include "cce/dnn_base.h"
+#include "cce/dnn_op.h"
+
+#endif  // DNN_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn_base.h b/third_party/fwkacllib/inc/inc/cce/dnn_base.h
new file mode 100644
index 00000000..912ba671
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn_base.h
@@ -0,0 +1,676 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_BASE_H__
+#define DNN_BASE_H__
+
+#include "cce/blas_struct.h"
+#include "cce/customize.h"
+#include "cce/dnn_base_def.hpp"
+
+namespace cce {
+/**
+  * @ingroup dnn
+  * @brief Minimum epsilon allowed to be used in the Batch Normalization formula
+  */
+#define CC_BN_MIN_EPSILON               (1e-7)
+
+#ifndef NULL
+    #ifdef __cplusplus
+        #define NULL 0
+    #else
+        #define NULL ((void *)0)
+    #endif
+#endif
+
+/**
+  * @ingroup dnn
+  * @brief max number of dimensions
+  */
+#define CC_DIM_MAX (8)
+
+typedef  struct cCTagL2LossDescriptor *   ccL2LossDescriptor_t;
+
+/**
+  * @ingroup dnn
+  * @brief mode of concatfive2fout
+  */
+typedef enum tagTransForLossMode {
+    CC_TRANS_FOR_BOX = 0,
+    CC_TRANS_FOR_SCORE,
+} ccTransForLossMode_t;
+
+/**
+  * @ingroup dnn
+  * @brief descriptor of concatfive2fout
+  */
+typedef struct tagCcConcatFive2Four_t *ccConcatFive2FourDescriptor_t;
+
+}; /* end cce */
+
+namespace cce {
+
+/**
+  * @ingroup dnn
+  * @brief create descriptor of tensor
+  * @param [in|out] tensorDesc   point to descriptor of tensor
+  * @return ccStatus_t
+  */
+ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensorDesc);
+
+/**
+  * @ingroup dnn
+  * @brief destroy descriptor of tensor
+  * @param [in] *tensorDesc   descriptor of tensor
+  * @return ccStatus_t
+  */
+ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensorDesc);
+
+/**
+  * @ingroup dnn
+  * @brief init tensor to 4d tensor
+  * @param [in|out] tensorDesc   descriptor of tensor
+  * @param [in] format   format of tensor
+  * @param [in] dataType   data type in device
+  * @param [in] n   batch size
+  * @param [in] c   channels
+  * @param [in] h   height of feature map
+  * @param [in] w   width of feature map
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensorDesc,
+                                   ccTensorFormat_t format,
+                                   ccDataType_t dataType,
+                                   int32_t n,
+                                   int32_t c,
+                                   int32_t h,
+                                   int32_t w);
+
+/**
+  * @ingroup dnn
+  * @brief read 4d tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in|out] dataType   point to data type in device
+  * @param [in|out] n   point to batch size
+  * @param [in|out] c   point to channels
+  * @param [in|out] h   point to height of feature map
+  * @param [in|out] w   point to width of feature map
+  * @param [in|out] nStride   point to stride of n
+  * @param [in|out] cStride   point to stride of c
+  * @param [in|out] hStride   point to stride of h
+  * @param [in|out] wStride   point to stride of w
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc,
+                                   ccDataType_t *dataType,
+                                   int32_t *n,
+                                   int32_t *c,
+                                   int32_t *h,
+                                   int32_t *w,
+                                   int32_t *nStride,
+                                   int32_t *cStride,
+                                   int32_t *hStride,
+                                   int32_t *wStride);
+
+/**
+* @ingroup dnn
+* @brief print 4d tensor (just in debug log mode)
+* @param [in] tensorDesc   descriptor of tensor
+* @return ccStatus_t
+*/
+ccStatus_t ccPrintTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc);
+
+/**
+* @ingroup dnn
+* @brief print Nd tensor (just in debug log mode)
+* @param [in] tensorDesc   descriptor of tensor
+* @return ccStatus_t
+*/
+ccStatus_t ccPrintTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc);
+
+/**
+  * @ingroup dnn
+  * @brief init tensor to Nd tensor
+  * @param [in|out] tensorDesc   descriptor of tensor
+  * @param [in] dataType   data type in device
+  * @param [in] dimCnt   Dimension of the tensor
+  * @param [in] dimA   Array of dimension dimCnt that contain the size of the tensor for every dimension. Size along unused dimensions should be set to 1.
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensorDesc,
+                                   ccDataType_t dataType,
+                                   int32_t dimCnt,
+                                   int32_t dimA[]);
+
+/**
+  * @ingroup dnn
+  * @brief read Nd tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in] dimCntReq   point to data type in device
+  * @param [in|out] dataType   point to data type in device
+  * @param [in|out] dimCnt   Dimension of the tensor
+  * @param [in|out] dimA   Array of dimension of at least dimCntReq that will be filled with the dimensions from the provided tensor descriptor.
+  * @param [in|out] strideA   Array of dimension dimCntReq that contain the stride of the tensor for every dimension
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc,
+                                   int32_t dimCntReq,
+                                   ccDataType_t *dataType,
+                                   int32_t *dimCnt,
+                                   int32_t dimA[],
+                                   int32_t strideA[]);
+
+/**
+  * @ingroup dnn
+  * @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] x   point to input data in host memory
+  * @param [in] dataTypeTransmode   mode of data type transform
+  * @param [in] yDesc   descriptor of output tensor
+  * @param [in|out] y   point to output data in host memory
+  * @param [in] ySizeInBytes   size of outputData
+  * @return ccStatus_t
+  */
+ccStatus_t ccTransTensor(const ccTensorDescriptor_t xDesc,
+                         const void *x,
+                         const ccTensorDescriptor_t yDesc,
+                         void *y,
+                         uint32_t ySizeInBytes);
+
+/**
+  * @ingroup dnn
+  * @brief get the format and dimcnt of Tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in|out] format   point to format
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensorDesc,
+                             ccTensorFormat_t  *format);
+
+/**
+  * @ingroup dnn
+  * @brief set the format and dimcnt of Tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in|out] format   point to format
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetTensorFormat(ccTensorDescriptor_t tensorDesc,
+                             ccTensorFormat_t  format);
+
+
+/**
+  * @ingroup dnn
+  * @brief get the RealDimCnt of Tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in|out] RealDimCnt   point to RealDimCnt
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetTensorRealDimCnt(const ccTensorDescriptor_t tensorDesc,
+                                 int32_t *realDimCnt);
+
+/**
+  * @ingroup dnn
+  * @brief set the RealDimCnt of Tensor
+  * @param [in|out] tensorDesc   descriptor of tensor
+  * @param [in] RealDimCnt   RealDimCnt to set
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensorDesc,
+                                 int32_t realDimCnt);
+
+
+/**
+  * @ingroup dnn
+  * @brief get data size of 4d tensor
+  * @param [in] tensorDesc   descriptor of tensor
+  * @param [in|out] size   point to data size
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);
+
+/**
+* @ingroup dnn
+* @brief get data size of 4d tensor which is align to 32B
+* @param [in] tensorDesc   descriptor of tensor
+* @param [in|out] size   point to data size
+* @return ccStatus_t
+*/
+ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);
+
+
+ccStatus_t ccSetTensorDataSize(ccTensorDescriptor_t xDesc, uint32_t size);
+
+/**
+  * @ingroup dnn
+  * @brief get data size of 4d filter
+  * @param [in] filterDesc   descriptor of filter
+  * @param [in] groupNum number of group
+  * @param [in|out] size   point to data size
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, uint32_t *size);
+
+
+/**
+  * @ingroup dnn
+  * @brief read 4d filter
+  * @param [in] filterDesc   descriptor of filter
+  * @param [in|out] format   point to format of filter
+  * @param [in|out] dataType   point to data type in device
+  * @param [in|out] k   point to number of output feature maps
+  * @param [in|out] c   point to number of input feature maps
+  * @param [in|out] h   point to height of filter
+  * @param [in|out] w   point to width of filter
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetFilter4dDescriptor(const ccFilterDescriptor_t filterDesc,
+                                   ccTensorFormat_t *format,
+                                   ccDataType_t *dataType,
+                                   int32_t *k,
+                                   int32_t *c,
+                                   int32_t *h,
+                                   int32_t *w);
+
+ccStatus_t ccTransFilterFracZToNCHW(const ccFilterDescriptor_t wDesc,
+                                    const void *w,
+                                    ccFilterDescriptor_t yDesc,
+                                    void *y,
+                                    uint32_t ySizeInBytes);
+
+/**
+  * @ingroup dnn
+  * @brief trans weight to fractal format, and trans data type together
+  * @param [in] wDesc   descriptor of input filter
+  * @param [in] w   input data pointer
+  * @param [in] yDesc   descriptor of output filter
+  * @param [in|out] y   output data pointer
+  * @param [in] ySizeInBytes   size of outputData
+  * @return ccStatus_t
+  */
+ccStatus_t ccTransFilter(const ccFilterDescriptor_t wDesc,
+                         const void *w,
+                         const ccFilterDescriptor_t yDesc,
+                         void *y,
+                         uint32_t ySizeInBytes);
+
+/**
+  * @ingroup dnn
+  * @brief trans weight to fractal format, and trans data type together
+  * @param [in] wDesc   descriptor of input filter
+  * @param [in] w   input data pointer
+  * @param [in] dataTypeTransmode   mode of data type transform
+  * @param [in] yDesc   descriptor of output filter
+  * @param [in|out] y   output data pointer
+  * @param [in] ySizeInBytes   size of outputData
+  * @return ccStatus_t
+  */
+ccStatus_t ccTransFilterInt8(const ccFilterDescriptor_t wDesc,
+                         const void *w,
+                         ccFilterDescriptor_t yDesc,
+                         void *y,
+                         uint32_t ySizeInBytes,
+                         ccDataType_t outputDataType);
+
+/**
+  * @ingroup dnn
+  * @brief create descriptor of filter
+  * @param [in|out] filterDesc   point to descriptor of filter
+  * @return ccStatus_t
+  */
+ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filterDesc);
+
+/**
+  * @ingroup dnn
+  * @brief destroy descriptor of filter
+  * @param [in] *filterDesc   descriptor of filter
+  * @return ccStatus_t
+  */
+ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filterDesc);
+
+/**
+  * @ingroup dnn
+  * @brief init conv descriptor to 2d conv
+  * @param [in|out] convDesc   descriptor of convolution operator
+  * @param [in] mode   mode of convolution
+  * @param [in] padMode   mode of padding
+  * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
+  * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
+  * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
+  * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
+  * @param [in] strideH   stride in height
+  * @param [in] strideW   stride in width
+  * @param [in] dilationH   dilation in height
+  * @param [in] dilationW   dilation in width
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetConvolution2dDescriptor(ccConvolutionDescriptor_t convDesc,
+                                        ccConvolutionMode_t mode,
+                                        ccPaddingMode_t padMode,
+                                        int32_t padHHead,
+                                        int32_t padHTail,
+                                        int32_t padWHead,
+                                        int32_t padWTail,
+                                        int32_t group,
+                                        int32_t strideH,
+                                        int32_t strideW,
+                                        int32_t dilationH,
+                                        int32_t dilationW);
+
+/**
+  * @ingroup dnn
+  * @brief read 2d conv
+  * @param [in] convDesc   descriptor of convolution operator
+  * @param [in|out] mode   point to mode of convolution
+  * @param [in] padMode   mode of padding
+  * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
+  * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
+  * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
+  * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
+  * @param [in|out] strideH   point to stride in height
+  * @param [in|out] strideW   point to stride in width
+  * @param [in|out] dilationH   point to dilation in height
+  * @param [in|out] dilationW   point to dilation in width
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetConvolution2dDescriptor(const ccConvolutionDescriptor_t convDesc,
+                                        ccConvolutionMode_t *mode,
+                                        ccPaddingMode_t *padMode,
+                                        int32_t *padHHead,
+                                        int32_t *padHTail,
+                                        int32_t *padWHead,
+                                        int32_t *padWTail,
+                                        int32_t *group,
+                                        int32_t *strideH,
+                                        int32_t *strideW,
+                                        int32_t *dilationH,
+                                        int32_t *dilationW);
+
+/**
+  * @ingroup dnn
+  * @brief get the output dimension info of 2d convolution
+  * @param [in] convDesc   descriptor of convolution operator
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] wDesc   descriptor of filter
+  * @param [in|out] n   point to batch size
+  * @param [in|out] c   point to channels
+  * @param [in|out] h   point to height of feature map
+  * @param [in|out] w   point to width of feature map
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetConvolution2dForwardOutputDim(const ccConvolutionDescriptor_t  convDesc,
+                                              const ccTensorDescriptor_t xDesc,
+                                              const ccFilterDescriptor_t wDesc,
+                                              int32_t *n,
+                                              int32_t *c,
+                                              int32_t *h,
+                                              int32_t *w);
+
+/**
+  * @ingroup dnn
+  * @brief create descriptor of convolution operator
+  * @param [in|out] filterDesc   point to descriptor of convolution operator
+  * @return ccStatus_t
+  */
+ccStatus_t ccCreateConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);
+
+/**
+  * @ingroup dnn
+  * @brief destroy descriptor of convolution operator
+  * @param [in] *convDesc   descriptor of convolution operator
+  * @return ccStatus_t
+  */
+ccStatus_t ccDestroyConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);
+
+/**
+  * @ingroup dnn
+  * @brief check specific stride condition flag
+  * @param [in] deconvDesc   descriptor of Deconvolution operator
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] yDesc   descriptor of output tensor
+  * @param [in] biasDesc   descriptor of bias tensor
+  * @param [in] wDesc   descriptor of filter
+  * @param [in|out] transMark   output condition flag
+  * @return ccStatus_t
+  */
+ccStatus_t ccDeconvSpStrideCondCheck(const ccConvolutionDescriptor_t deconvDesc,
+                                     const ccTensorDescriptor_t xDesc,
+                                     const ccTensorDescriptor_t yDesc,
+                                     const ccTensorDescriptor_t biasDesc,
+                                     const ccFilterDescriptor_t wDesc,
+                                     uint32_t &transMark);
+
+/**
+  * @ingroup dnn
+  * @brief special deconv stride trans
+  * @param [in] deconvDesc   descriptor of Deconvolution operator
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] yDesc   descriptor of output tensor
+  * @param [in] biasDesc   descriptor of bias tensor
+  * @param [in] deconvStPtr   descriptor of filter
+  * @param [in|out] xStPtr   descriptor of trans input tensor
+  * @param [in|out] yStPtr   descriptor of trans output tensor
+  * @param [in|out] wStPtr   descriptor of trans filter tensor
+  * @param [in|out] wDesc   descriptor of trasn filter
+  * @param [in|out] transMark   condition flag
+  * @return ccStatus_t
+  */
+ccStatus_t ccDeconvSpStrideDescTrans(const ccConvolutionDescriptor_t deconvDesc,
+                          const ccTensorDescriptor_t xDesc,
+                          const ccTensorDescriptor_t yDesc,
+                          const ccTensorDescriptor_t biasDesc __attribute__((__unused__)),
+                          const ccFilterDescriptor_t wDesc,
+                          ccConvolutionDescriptor_t deconvStPtr,
+                          ccTensorDescriptor_t xStPtr,
+                          ccTensorDescriptor_t yStPtr,
+                          ccFilterDescriptor_t wStPtr,
+                          uint32_t transMark);
+
+/**
+  * @ingroup dnn
+  * @brief check deconv goto aicore flag
+  * @param [in] deconvDesc   descriptor of Deconvolution operator
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] yDesc   descriptor of output tensor
+  * @param [in] wDesc   descriptor of filter
+  * @param [in] isGotoAicore  out flag
+  * @param [in] transMark   condition flag
+  * @return ccStatus_t
+  */
+ccStatus_t ccDeconvCheckGotoAiCore(const ccConvolutionDescriptor_t deconvDesc,
+                           const ccTensorDescriptor_t xDesc,
+                           const ccTensorDescriptor_t yDesc,
+                           const ccFilterDescriptor_t wDesc,
+                           uint32_t *isGotoAicore,
+                           uint32_t transMark);
+
+/**
+  * @ingroup dnn
+  * @brief get the output dimension info of 2d Deconvolution
+  * @param [in] deconvDesc   descriptor of Deconvolution operator
+  * @param [in] xDesc   descriptor of input tensor
+  * @param [in] wDesc   descriptor of filter
+  * @param [in|out] n   point to batch size
+  * @param [in|out] c   point to channels
+  * @param [in|out] h   point to height of feature map
+  * @param [in|out] w   point to width of feature map
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetDeconvolution2dForwardOutputDim(const ccConvolutionDescriptor_t deconvDesc,
+                                                const ccTensorDescriptor_t xDesc,
+                                                const ccFilterDescriptor_t wDesc,
+                                                int32_t *n,
+                                                int32_t *c,
+                                                int32_t *h,
+                                                int32_t *w);
+
+/**
+  * @ingroup dnn
+  * @brief create descriptor of PAD
+  * @param [in|out] padDesc  point to descriptor of pad
+  * @return ccStatus_t
+  */
+ccStatus_t ccCreatePadDescriptor(ccPadDescriptor_t *padDesc);
+
+/**
+  * @ingroup dnn
+  * @brief destroy descriptor of PAD
+  * @param [in] *padDesc descriptor of PAD
+  * @return ccStatus_t
+  */
+ccStatus_t ccDestroyPadDescriptor(ccPadDescriptor_t *padDesc);
+
+/**
+  * @ingroup dnn
+  * @brief set PADDesc
+  * @param [in|out] padDesc descriptor of PAD
+  * @param [in] padMode  mode of PAD
+  * @param [in] padValue  pad value of PAD
+  * @param [in] wleft width left pad of PAD
+  * @param [in] wright width right of PAD
+  * @param [in] htop higth pad of PAD
+  * @param [in] hbottom higth bottom pad of PAD
+  * @return ccStatus_t
+  */
+ccStatus_t ccSetPadDescriptor(ccPadDescriptor_t padDesc,
+                                    ccPadMode_t  padMode,
+                                    float padValue,
+                                    int32_t htop,
+                                    int32_t hbottom,
+                                    int32_t wleft,
+                                    int32_t wright);
+
+/**
+  * @ingroup dnn
+  * @brief read 2d pooling
+  * @param [in] poolingDesc   descriptor of pooling operator
+  * @param [in|out] mode   point to mode of pooling
+  * @param [in|out] maxpoolingNanOpt   point to Nan propagation mode
+  * @param [in|out] windowH   point to height of pooling window
+  * @param [in|out] windowW   point to width of pooling window
+  * @param [in|out] padHHead   point to zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
+  * @param [in|out] padHTail   point to zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
+  * @param [in|out] padWHead   point to zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
+  * @param [in|out] padWTail   point to zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
+  * @param [in|out] strideH   point to stride in height
+  * @param [in|out] strideW   point to stride in width
+  * @param [in|out] dataMode
+  * @param [in|out] ceilMode  0:Ceil 1:Floor
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetPooling2dDescriptor(const ccPoolingDescriptor_t poolingDesc,
+                                    ccPoolingMode_t *mode,
+                                    ccPaddingMode_t *padMode,
+                                    ccNanPropagation_t *maxpoolingNanOpt,
+                                    int32_t *windowH,
+                                    int32_t *windowW,
+                                    int32_t *padHHead,
+                                    int32_t *padHTail,
+                                    int32_t *padWHead,
+                                    int32_t *padWTail,
+                                    int32_t *strideH,
+                                    int32_t *strideW,
+                                    int32_t *dataMode,
+                                    int32_t *ceilMode,
+                                    ccPooingFwdAlgo_t *algo);
+
+ccStatus_t ccGetCompare5dOutputDim(const ccTensorDescriptor_t xDesc,
+                                   const ccTensorDescriptor_t yDesc,
+                                   int32_t* dimCnt,
+                                   int32_t* dim,
+                                   int32_t dimLen);
+
+ccStatus_t ccGetMaximum5dOutputDim(const ccTensorDescriptor_t xDesc,
+                                   const ccTensorDescriptor_t yDesc,
+                                   int32_t* dimCnt,
+                                   int32_t* dim,
+                                   int32_t dimLen);
+
+ccStatus_t ccGetMinimum5dOutputDim(const ccTensorDescriptor_t xDesc,
+                                   const ccTensorDescriptor_t yDesc,
+                                   int32_t* dimCnt,
+                                   int32_t* dim,
+                                   int32_t dimLen);
+
+ccStatus_t ccGetReduce5dOutputDim(const ccTensorDescriptor_t xDesc,
+                                const ccIntArray_t* axis,
+                                bool keepDims,
+                                int32_t *dimCnt,
+                                int32_t dim[],
+                                int32_t dimLen);
+
+/**
+ * @brief get out put descrition of slice tensor.
+ * @param [in] xDesc         descriptor of input data
+ * @param [in] begin         begin position of tensor
+ * @param [in] size          size to slice
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ ccStatus_t ccGetSliceOutputDim(
+      const ccTensorDescriptor_t xDesc,
+      const ccIntArray_t* begin,
+      const ccIntArray_t* size,
+      int32_t *dimCnt,
+      int32_t dim[],
+      int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get strided slice output dim info.
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
+ * @param [in] attrDesc         reserve for optional attributes.
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
+                                    const ccStridedSliceDescriptor_t stridedSliceDesc,
+                                    const ccStridedSliceAttrsDescriptor_t attrDesc,
+                                    int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+  * @ingroup dnn
+  * @brief get workspace size for softmax computation
+  * @param [in] handle              cce handle
+  * @param [in] xDesc               descriptor of input tensor
+  * @param [in] yDesc               descriptor of output tensor
+  * @param [in|out] sizeInBytes     workSpace size in bytes
+  * @return ccStatus_t
+  */
+ccStatus_t ccGetSoftmaxForwardWorkspaceSize(ccHandle_t handle,
+                                            const ccTensorDescriptor_t xDesc,
+                                            const ccTensorDescriptor_t yDesc,
+                                            uint32_t *sizeInBytes);
+
+/**
+  * @ingroup dnn
+  * @brief set quantize algorithm type and quantize scale type (vector or scalar)
+  * @param [in] quantizeInfo    descriptor of quantize parameters
+  * @param [in] quantAlgo       enum type for quantize algorithm type
+  * @param [in] scaleType       enum type for quantize scale type
+  * @param [in] reluflag        flag for relu
+  * @return ccStatus_t
+  */
+ccStatus_t   ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType);
+ccStatus_t   ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType, bool reluFlag);
+
+}; /* end cce */
+
+#endif  // DNN_BASE_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp b/third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp
new file mode 100644
index 00000000..8ce5e933
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp
@@ -0,0 +1,994 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_BASE_HPP__
+#define DNN_BASE_HPP__
+
+#include "cce/cce_def.hpp"
+
+namespace cce {
+
+/**
+ * @ingroup dnn
+ * @brief tiling para
+ */
+typedef struct tagCcWeightCompressInfo {
+  uint32_t blockRow;     /**< block row */
+  uint32_t blockCol;     /**< block col */
+  uint32_t fractalK;     /**< fractal K */
+  uint32_t fractalN;     /**< fractal N */
+  uint32_t lastFractalK; /**< K of last fractal */
+  uint32_t lastFractalN; /**< N of last fractal */
+  uint32_t cubeSize;     /**< cube's length */
+  uint32_t loadDir;      /**< data load directtiono 0??col load     1:row load*/
+} ccWeightCompressInfo_t;
+
+/**
+ * @ingroup dnn
+ * @brief compress table info
+ */
+typedef struct tagCcWeightCompressTab {
+  uint16_t dataLen : 14;  /**< 0: data length in 128 Byte */
+  uint16_t storeFlag : 1; /**< 0: compressed addr = original addr, 1: compressed addr = original addr + 256 Byte */
+  uint16_t dataType : 1;  /**< 0: original data, 1: compressed data */
+} ccWeightCompressTab_t;
+
+/**
+ * @conv quantize dnn vector mode/scalar mode
+ */
+typedef enum {
+  QUANT_ALGO_NON_OFFSET = 0,
+  QUANT_ALGO_HALF_OFFSET = 1,
+  QUANT_ALGO_ALL_OFFSET = 2,
+  QUANT_ALGO_BUTT
+} ccQuantizeAlgo_t;
+typedef enum { SCALE_VEC = 0, SCALE_SCALAR = 1, SCALE_TYPE_BUTT } ccConvolutionScaleType_t, ccScaleType_t;
+
+/**
+ * @conv quantize dnn sqrt mode/non sqrt mode
+ */
+typedef enum {
+  SCALE_NORMAL = 0,
+  SCALE_SQRT = 1,
+  SCALE_VALUE_MODE_BUTT
+} ccConvolutionScaleValueMode_t,
+    ccScaleValueMode_t;
+
+typedef struct {
+  float scaleW;
+  float scaleD;
+  float scaleDNext;
+  uint8_t offsetW;
+  uint8_t offsetD;
+  uint8_t offsetDNext;
+} ccQuantAllOffsetPara_t;
+
+typedef struct tagCcVecQuantizePara {
+  float scale;
+  uint16_t offset;
+  uint16_t rrv;  // 32byte align
+} ccVecQuantizePara_t;
+
+/**
+ * @ingroup dnn
+ * @brief format of tensor
+ */
+typedef enum tagCcTensorFormat {
+  CC_TENSOR_NCHW = 0,  /**< NCHW */
+  CC_TENSOR_NHWC,      /**< NHWC */
+  CC_TENSOR_ND,        /**< Nd Tensor */
+  CC_TENSOR_NC1HWC0,   /**< NC1HWC0 */
+  CC_TENSOR_FRACTAL_Z, /**< FRACTAL_Z */
+  CC_TENSOR_NC1C0HWPAD,
+  CC_TENSOR_NHWC1C0,
+  CC_TENSOR_FSR_NCHW,
+  CC_TENSOR_FRACTAL_DECONV,
+  CC_TENSOR_C1HWNC0,
+  CC_TENSOR_FRACTAL_DECONV_TRANSPOSE,
+  CC_TENSOR_FRACTAL_DECONV_SP_STRIDE_TRANS,
+  CC_TENSOR_NC1HWC0_C04,   /**< NC1HWC0, C0 =4*/
+  CC_TENSOR_FRACTAL_Z_C04, /**< FRACZ?????C0 =4 */
+  CC_TENSOR_CHWN,
+  CC_TENSOR_FRACTAL_DECONV_SP_STRIDE8_TRANS,
+  CC_TENSOR_HWCN,
+  CC_TENSOR_NC1KHKWHWC0, /** < KH,KW kernel h& kernel w maxpooling max output format*/
+  CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS = 20,
+  CC_TENSOR_HASHTABLE_LOOKUP_KEYS,
+  CC_TENSOR_HASHTABLE_LOOKUP_VALUE,
+  CC_TENSOR_HASHTABLE_LOOKUP_OUTPUT,
+  CC_TENSOR_HASHTABLE_LOOKUP_HITS = 24,
+  CC_TENSOR_C1HWNCoC0, /**< C1,H,W,N,Co,C0 6D diagonal format*/
+  CC_TENSOR_RESERVED
+} ccTensorFormat_t;
+
+/**
+ * @ingroup dnn
+ * @brief format of compare
+ */
+typedef enum tagCcCompareType {
+  CC_COMPARE_TYPE_LESS = 0,
+  CC_COMPARE_TYPE_LESS_EQUAL,
+  CC_COMPARE_TYPE_NOT_EQUAL,
+  CC_COMPARE_TYPE_EQUAL,
+  CC_COMPARE_TYPE_GREATER,
+  CC_COMPARE_TYPE_GREATER_EQUAL,
+  CC_COMPARE_TYPE_RESERVED
+} ccCompareType_t;
+
+/**
+ * @ingroup dnn
+ * @brief propagate Nan
+ */
+typedef enum tagCcNanPropagation {
+  CC_NAN_NOT_PROPAGATE = 0, /**< Nan numbers are not propagated */
+  CC_NAN_PROPAGATE,         /**< Nan numbers are propagated */
+  CC_NAN_PROPAGATE_RESERVED
+} ccNanPropagation_t;
+
+/**
+ * @ingroup dnn
+ * @brief algorithm of convolution forward
+ */
+typedef enum tagCcConvolutionFwdAlgo {
+  CC_CONVOLUTION_FWD_ALGO_GEMM = 0, /**< matrix gemm algo */
+  CC_CONVOLUTION_FWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
+  CC_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,
+  CC_CONVOLUTION_FWD_ALGO_RESERVED
+} ccConvolutionFwdAlgo_t;
+
+#define ccCorrelationFwdAlgo_t ccConvolutionFwdAlgo_t
+
+typedef enum tagCcConvolutionBwdAlgo {
+  CC_CONVOLUTION_BWD_ALGO_GEMM = 0, /**< matrix gemm algo */
+  CC_CONVOLUTION_BWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
+  CC_CONVOLUTION_BWD_ALGO_GEMM_CO2IMG,
+  CC_CONVOLUTION_BWD_FILTER_GEM_ALGO,
+  CC_CONVOLUTION_BWD_ALGO_RESERVED
+} ccConvolutionBwdAlgo_t;
+
+#define ccCorrelationBwdAlgo_t ccConvolutionBwdAlgo_t
+
+/**
+ * @ingroup dnn
+ * @brief algorithm of FullConnect forward
+ */
+typedef enum tagCcFullConnectFwdAlgo {
+  CC_FULLCONNECT_FWD_ALGO_HALF = 0,
+  CC_FULLCONNECT_FWD_ALGO_FLOAT32
+} ccFullConnectFwdAlgo_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of convolution
+ */
+typedef enum tagCcConvolutionMode {
+  CC_CONV_CONVOLUTION = 0,   /**< math convolution */
+  CC_CONV_CROSS_CORRELATION, /**< cross-correlation convolution */
+  CC_CONV_DECONVOLUTION,     /**< deconvolution, also named transposed convolution*/
+  CC_CONV_MODE_DEPTHWISE,    /**< depthwise convolution*/
+  CC_CONV_MODE_RESERVED
+} ccConvolutionMode_t;
+
+#define ccCorrelationMode_t ccConvolutionMode_t
+
+/**
+ * @ingroup dnn
+ * @brief mode of pooling
+ */
+typedef enum tagCcPoolingMode {
+  CC_POOLING_MAX = 0,  /**< max pooling */
+  CC_POOLING_AVG,      /**< average pooling */
+  CC_POOLING_L2,       /**< L2 pooling */
+  CC_POOLING_AVG_FP32, /**< average pooling for training */
+  CC_POOLING_RESERVED
+} ccPoolingMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief L0C accumulate algo of AvgPooling
+ */
+typedef enum tagCcPooingFwdAlgo {
+  CC_POOLING_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
+  CC_POOLING_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
+} ccPooingFwdAlgo_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of momentum
+ */
+typedef enum tagMomentumAlgo {
+  CC_MOMENTUM_UPDATE_FP32 = 0,  /**< FP32 out */
+  CC_MOMENTUM_UPDATE_FP32_FP16, /**< FP32 and FP16 out */
+  CC_MOMENTUM_UPDATE_FP32_NESTEROV,
+  CC_MOMENTUM_UPDATE_FP32_FP16_NESTEROV,
+  CC_MOMENTUM_RESERVED
+} ccMomentumAlgo_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of partitionStrategy
+ *attention: if need to motify this struct,please must motify dPartitionStrategy_t
+ */
+typedef enum tagCcPartitionStrategy {
+  CC_PARTITION_STRATEGY_MOD = 0, /**< mod */
+  CC_PARTITION_STRATEGY_DIV,     /**< div */
+  CC_PARTITION_STRATEGY_RESERVED
+} ccPartitionStrategy_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of assignOp
+ */
+typedef enum tagCcAssignOpMode {
+  CC_ASSIGN_ADD = 0, /**< assign add */
+  CC_ASSIGN_SUB,     /**< assign sub */
+  CC_ASSIGN_RESERVED
+} ccAssignOpMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of arcSinCos
+ */
+typedef enum tagCcArcSinCosMode {
+  CC_ARCUS_SIN = 0, /**< asin */
+  CC_ARCUS_COS,     /**< acos */
+  CC_ARCUS_RESERVED
+} ccArcSinCosMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of padding
+ */
+typedef enum tagCcPaddingMode {
+  CC_PADDING_CEIL = 0,
+  CC_PADDING_DIRECTASSIGN,
+  CC_PADDING_VALID,
+  CC_PADDING_SAME,      /**< Padding values of 0 are always used */
+  CC_PADDING_CEIL_NEW,  /*new ceil,use for backward compatibility*/
+  CC_PADDING_VALID_NEW, /*new valid,use for backward compatibility*/
+  CC_PADDING_SAME_NEW,  /*new same,use for backward compatibility*/
+  CC_PADDING_RESERVED
+} ccPaddingMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of activation
+ */
+typedef enum tagCcActivationMode {
+  CC_ACTIVATION_SIGMOID = 0,  /**< sigmoid */
+  CC_ACTIVATION_RELU,         /**< ReLU */
+  CC_ACTIVATION_TANH,         /**< tanh */
+  CC_ACTIVATION_CLIPPED_RELU, /**< clipped ReLU */
+  CC_ACTIVATION_ELU,          /**< ELU */
+  CC_ACTIVATION_LEAKY_RELU,
+  CC_ACTIVATION_ABS,            /**< Abs */
+  CC_ACTIVATION_RELU1,          /**< relu1 */
+  CC_ACTIVATION_SOFTSIGN,       /**< softsign */
+  CC_ACTIVATION_SOFTPLUS,       /**< softplus */
+  CC_ACTIVATION_HARDSIGMOID,    /**< hardsigmoid*/
+  CC_ACTIVATION_THRESHOLD_RELU, /**< threshold */
+  CC_ACTIVATION_SELU,           /**< selu */
+  CC_ACTIVATION_LINEAR,         /**< linear */
+  CC_ACTIVATION_RELU6,          /**< relu6 */
+  CC_ACTIVATION_RESERVED
+} ccActivationMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of logical op mode
+ */
+typedef enum tagCcLogicalOpMode {
+  CC_LOGICAL_OP_NOT = 0, /**logical not**/
+  CC_LOGICAL_OP_AND,     /**logical and**/
+  CC_LOGICAL_OP_OR,      /**logical or**/
+  CC_LOGICAL_OP_XOR,     /**logical xor**/
+  CC_LOGICAL_OP_RESERVED
+} ccLogicalOpMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of batchnorm
+ */
+typedef enum tagCcBatchNormMode {
+  CC_BATCHNORM_PER_ACTIVATION = 0, /**< bnScale, bnBias tensor dims are 1xCxHxW */
+  CC_BATCHNORM_SPATIAL,            /**< bnScale, bnBias tensor dims are 1xCx1x1 */
+  CC_BATCHNORM_RESERVED
+} ccBatchNormMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of instancenorm
+ */
+typedef enum tagCcInstanceNormMode {
+  CC_INSTANCENORM_PER_ACTIVATION = 0, /**< inScale, inBias tensor dims are NxCxHxW */
+  CC_INSTANCENORM_SPATIAL,            /**< inScale, inBias tensor dims are NxCx1x1 */
+  CC_INSTANCENORM_RESERVED
+} ccInstanceNormMode_t;
+/**
+ * @ingroup dnn
+ * @brief mode of layernorm
+ */
+typedef enum tagCcLayerNormMode {
+  CC_LAYERNORM_PER_ACTIVATION = 0, /**< lnScale, lnBias tensor dims are 1xCxHxW */
+  CC_LAYERNORM_SPATIAL,            /**< lnScale, lnBias tensor dims are Nx1x1x1 */
+  CC_LAYERNORM_RESERVED
+} ccLayerNormMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief softmax algorithm
+ */
+typedef enum tagCcSoftmaxAlgo {
+  CC_SOFTMAX_FAST = 0,      /**< straightforward implementation */
+  CC_SOFTMAX_ACCURATE,      /**< subtract max from every point to avoid overflow */
+  CC_SOFTMAX_LOG,           /**< perform the Log softmax operation to avoid overflow */
+  CC_SOFTMAX_ACCURATE_FP32, /**< accurate mode for fp32 */
+  CC_SOFTMAX_RESERVED
+} ccSoftmaxAlgo_t;
+
+/**
+ * @ingroup dnn
+ * @brief softmax mode
+ */
+typedef enum tagCcSoftmaxMode {
+  CC_SOFTMAX_MODE_INSTANCE = 0, /**< compute the softmax over all C, H, W for each N */
+  CC_SOFTMAX_MODE_CHANNEL,      /**< compute the softmax over all C for each H, W, N */
+  CC_SOFTMAX_MODE_HEIGHT,       /**< compute the softmax over all H for each N, C, W */
+  CC_SOFTMAX_MODE_WIDTH,        /**< compute the softmax over all W for each N, C, H */
+  CC_SOFTMAX_MODE_CLASS,        /**< special mode: compute the softmax over all class for each N, H ,W */
+  CC_SOFTMAX_MODE_RESERVED
+} ccSoftmaxMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief cross entropy mode
+ */
+typedef enum tagCcCrossEntropyMode {
+  CC_CROSS_ENTROPY_SPARSE_WITHOUT_REDUCTION = 0, /**< compute the sparse cross entropy without fused reduce mean */
+  CC_CROSS_ENTROPY_SPARSE_WITH_REDUCTION,        /**< compute the sparse cross entropy with fused reduce mean*/
+  CC_CROSS_ENTROPY_WITHOUT_REDUCTION,            /**< compute the cross entropy without fused reduce mean */
+  CC_CROSS_ENTROPY_WITH_REDUCTION,               /**< compute the cross entropy with fused reduce mean */
+  CC_CROSS_ENTROPY_RESERVED
+} ccCrossEntropyMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief concat mode
+ */
+typedef enum tagCcConcatMode {
+  CC_CONCAT_BY_BATCH = 0, /**< concat by batch */
+  CC_CONCAT_BY_FEATURE,   /**< concat by feature */
+  CC_CONCAT_BY_HEIGHT,    /**< concat by height */
+  CC_CONCAT_BY_WIDTH,     /**< concat by width */
+  CC_CONCAT_BY_FLATTEN,
+  CC_CONCAT_RESERVED
+} ccConcatMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief eltwise mode
+ */
+typedef enum tagCcEltwiseMode {
+  CC_ELTWISE_PROD = 0, /**< prod */
+  CC_ELTWISE_SUM,      /**< sum */
+  CC_ELTWISE_MAX,      /**< max */
+  CC_ELTWISE_RESERVED
+} ccEltwiseMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief depthwise filter type
+ */
+typedef enum tagCcDepthwiseFilterType {
+  CC_Depthwise_FILTER_DEPTHWISE = 0, /**< depthwise filter */
+  CC_Depthwise_FILTER_POINTWISE,     /**< pointwise filter */
+  CC_Depthwise_FILTER_RESERVED
+} ccDepthwiseFilterType_t;
+
+/**
+ * @ingroup dnn
+ * @brief sampler type
+ */
+typedef enum tagCcSamplerType {
+  CC_SAMPLER_BILINEAR = 0, /**< bilinear sampler algo */
+  CC_SAMPLER_RESERVED
+} ccSamplerType_t;
+
+/**
+ * @ingroup dnn
+ * @brief NMS type
+ */
+typedef enum tagCcNmsType {
+  CC_NMS_IOU = 0, /**< nms operation type, only IOU for now */
+  CC_NMS_RESERVED
+} ccNmsType_t;
+
+/**
+ * @ingroup dnn
+ * @brief Box Code type
+ */
+typedef enum tagCcBoxCodeType {
+  CC_BOX_CORNER = 1, /**< Box CodeType in detection nets */
+  CC_BOX_CENTER_SIZE,
+  CC_BOX_CORNER_SIZE,
+  CC_BOX_RESERVED
+} ccBoxCodeType_t;
+
+/**
+ * @ingroup dnn
+ * @brief split mode
+ */
+typedef enum tagSplitMode {
+  CC_SPLIT_MODE_SLICE = 0, /**< spilt data of one dim*/
+  CC_SPLIT_MODE_DUPLICATE, /**< copy data of one dim*/
+  CC_SPLIT_MODE_RESERVED
+} ccSplitMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of LRN
+ */
+typedef enum tagCcLRNMode {
+  CC_LRN_CROSS_CHANNELS = 0, /**< CROSS_CHANNELS */
+  CC_LRN_WITHIN_CHANNELS,    /**< WITHIN_CHANNELS */
+  CC_LRN_RESERVED
+} ccLRNMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief format of AIPP input
+ */
+typedef enum tagCcAippInputFormat {
+  CC_AIPP_INPUT_YUV420SP_U8 = 1,
+  /**< YUV420SP */  // mini,lite,tiny
+  CC_AIPP_INPUT_XRGB8888_U8,
+  /**< XRGB8888 */  // mini,lite,tiny
+  CC_AIPP_INPUT_NC1HWC0DI_FP16,
+  /**< NC1HWC0DI_FP16 */  // mini
+  CC_AIPP_INPUT_NC1HWC0DI_S8,
+  /**< NC1HWC0DI_S8 */  // mini
+  CC_AIPP_INPUT_RGB888_U8,
+  /**< RGB888 */  // mini,tiny
+  CC_AIPP_INPUT_ARGB8888_U8,
+  /**< ARGB8888 */  // lite
+  CC_AIPP_INPUT_YUYV_U8,
+  /**< YUYV */  // lite
+  CC_AIPP_INPUT_YUV422SP_U8,
+  /**< YUV422SP */  // lite
+  CC_AIPP_INPUT_AYUV444_U8,
+  /**< AYUV444 */  // lite
+  CC_AIPP_INPUT_YUV400_U8,
+  /**< YUV400 */  // mini,lite,tiny
+  CC_AIPP_INPUT_RESERVED
+} ccAippInputFormat_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of AIPP padding
+ */
+typedef enum tagCcAippPaddingMode {
+  CC_AIPP_PAD_DEFAULT_VALUE = 1, /**< CONFIG_VALUE */
+  CC_AIPP_PAD_LINE_COPY,         /**< ROW_COL_COPY */
+  CC_AIPP_PAD_BLOCK_COPY,        /**< BLOCK_COPY */
+  CC_AIPP_PAD_MIRROR_COPY,       /**< MIRROR_COPY */
+  CC_AIPP_PAD_RESERVED
+} ccAippPaddingMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief format of cmp type
+ */
+typedef enum tagCcccCMPType {
+  CC_CMP_EQ = 0,
+  CC_CMP_NE,
+  CC_CMP_LT,
+  CC_CMP_GT,
+  CC_CMP_GE,
+  CC_CMP_LE,
+  CC_CMP_TYPE_RESERVED
+} ccCMPType_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of logical op mode
+ */
+typedef enum tagCcResultType {
+  CC_Result_AND = 0, /**logical and**/
+  CC_Result_OR,      /**logical or**/
+  CC_Result_RESERVED
+} ccResultType_t;
+
+/**
+ * @ingroup dnn
+ * @brief method of crop_and_resize operator
+ */
+typedef enum tagCcResizeMethod {
+  CC_RESIZE_METHOD_BILINEAR = 0, /** BILINEAR */
+  CC_RESIZE_METHOD_NEAREST,      /** NEAREST */
+  CC_RESIZE_METHOD_RESERVED
+} ccResizeMethod_t;
+
+/**
+ * @ingroup dnn
+ * @brief mode of calculating new size of the images
+ */
+typedef enum tagCcResizeOutputDimMode {
+  RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, /**< Output dimension specified by zoom factor*/
+  RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR,   /**< specified by shrink factor */
+  RESIZE_OUTPUT_DIM_EXPLICIT,           /**< specified explicitly */
+  RESIZE_OUTPUT_DIM_RESERVED
+} ccResizeOutputDimMode_t;
+
+typedef enum tagCcYoloVersion {
+  CC_YOLO_V2 = 1, /**< YOLOv2 */
+  CC_YOLO_V3,     /**< YOLOv3 */
+  CC_YOLO_RESERVED
+} ccYoloVersion_t;
+
+typedef enum tagCcAttentionAlgo {
+  // bahdanau-attention, for detail:https://pravn.wordpress.com/2017/11/14/bahdanau-attention/
+  CC_ATTENTION_ALGO_BAHDANAU = 0,
+  CC_ATTENTION_ALGO_NORMAL_BAHDANAU = 1,
+  CC_ATTENTION_ALGO_LUONG = 2,
+  CC_ATTENTION_ALGO_SCALED_LUONG = 3,
+  CC_ATTENTION_ALGO_RESERVED
+} AttentionAlgo_t;
+/**
+ * @ingroup dnn
+ * @brief desc of data layout
+ */
+typedef enum ccEmAttnDecoderDataLayout {
+  CC_ATTN_5D_TX1BX,  //[max_time,Xt1,1,batch_size,Xt0]
+  CC_ATTN_5D_BTX1X,  //[batch_size*max_time,Xt1,1,1,Xt0]
+  CC_ATTN_DL_RESERVED
+} ccEmAttnDecoderDataLayout_t;
+
+/**
+ * @ingroup dnn
+ * @brief operation of Reduce
+ */
+typedef enum {
+  CC_REDUCE_OP_SUM = 0,    /**< sum */
+  CC_REDUCE_OP_MEAN,       /**< mean */
+  CC_REDUCE_OP_PROD,       /**< product */
+  CC_REDUCE_OP_ALL,        /**< logical and */
+  CC_REDUCE_OP_ABS_SUM,    /**< absolute sum */
+  CC_REDUCE_OP_SQUARE_SUM, /**< square sum */
+  CC_REDUCE_OP_MAX,        /**< max */
+  CC_REDUCE_OP_MIN,        /**< min */
+  CC_REDUCE_OP_LOGSUMEXP,  /**< logsumexp */
+  CC_REDUCE_OP_INVALID
+} ccReduceOpType_t;
+
+/**
+ * @ingroup dnn
+ * @brief desc of tpye layout
+ */
+typedef enum {
+  LSH_PROJECTION_TYPE_UNKNOWN = 0,
+  LSH_PROJECTION_TYPE_SPARSE = 1,
+  LSH_PROJECTION_TYPE_DENSE = 2
+} LSHProjectionType;
+
+/**
+ * @ingroup dnn
+ * @brief activation para
+ */
+typedef struct tagCcActivationRelu {
+  double reluCoef; /* reluCoef for clipped RELU */
+  ccNanPropagation_t reluNanOpt;
+} ccActivationRelu_t;
+typedef union tagCcActivationPara {
+  ccActivationRelu_t actionRelu; /* relu Coef and NanOpt for clipped RELU */
+  double eluAlpha;               /* eluAlpha for ELU */
+  float leakyReluNegativeSlope;
+} ccActivationPara_u;
+
+/**
+ * @ingroup dnn
+ * @bref mode of square
+ */
+typedef enum tagCcSquareMode {
+  CC_SQUARE_2 = 0, /* square */
+} ccSquareMode_t;
+
+/**
+ * @ingroup dnn
+ * @brief append operation type
+ */
+typedef enum tagCcOpType {
+  CC_OP_TYPE_NO_RELU = 0,
+  CC_OP_TYPE_RELU = 1,
+  CC_OP_TYPE_RELU6 = 2,
+  CC_OP_TYPE_INVALID
+} ccOpType_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of fill operator type.
+ */
+typedef enum tagCcFillOpType {
+  CC_CONSTANT = 0,
+  CC_RANGE,
+  CC_LENGTH_RANGE,
+  CC_GIVEN_TENSOR,
+  CC_DIAGONAL,
+  CC_UNIFORM,
+  CC_UNIFORM_INT,
+  CC_UNIQUE_UNIFORM,
+  CC_GAUSSIAN,
+  CC_XAVIER,
+  CC_MSRA,
+  CC_FILL_OP_TYPE_RESERVED
+} ccFillOpType_t;
+
+/**
+ * @ingroup dnn
+ * @brief loss function reduction mode
+ */
+typedef enum tagCcLossReduction {
+  CC_LOSS_REDUCTION_NONE = 0,
+  CC_LOSS_REDUCTION_SUM,
+  CC_LOSS_REDUCTION_RESERVED
+} ccLossReduction_t;
+
+/**
+ * @ingroup dnn
+ * @brief max size of ccIntArray
+ */
+#define CC_INT_ARRAY_MAX_SIZE (8)
+
+/**
+ * @ingroup dnn
+ * @brief struct define of int array less than 8.
+ */
+typedef struct tagIntArray {
+  uint32_t size;
+  int32_t value[CC_INT_ARRAY_MAX_SIZE];
+} ccIntArray_t;
+
+typedef enum tagCcPadMode {
+  CC_PAD_CONSTANT = 0, /*CONSTANT */
+  CC_PAD_REFLECT,      /*REFLECT */
+  CC_PAD_SYMMETRIC,    /*SYMMETRIC*/
+  CC_PAD_EDGE,         /*EDGE */
+  CC_PAD_MODE_RESERVED
+} ccPadMode_t;
+
+/*
+ * @ingroup dnn
+ * @brief pad operation of extractImagePatches
+ */
+typedef enum {
+  CC_EXTRACT_IMAGE_PATCHES_PAD_VALID = 1,
+  CC_EXTRACT_IMAGE_PATCHES_PAD_SAME,
+  CC_EXTRACT_IMAGE_PATCHES_PAD_RESERVED
+} ccExtractImagePatchesPadType_t;
+
+/**
+ * @ingroup dnn
+ * @brief image dimensions of aipp input
+ */
+#define CC_AIPP_IMG_DIM (2)
+
+/**
+ * @ingroup dnn
+ * @brief image channel number of aipp input
+ */
+#define CC_AIPP_IMG_CHN_NUM (4)
+
+/**
+ * @ingroup dnn
+ * @brief element number of aipp color space convertion matrix
+ */
+#define CC_AIPP_CSC_MATRIX_DIM (9)
+
+/**
+ * @ingroup dnn
+ * @brief element number of aipp color space convertion bias
+ */
+#define CC_AIPP_CSC_BIAS_DIM (3)
+
+/**
+ * @ingroup dnn
+ * @brief struct define of AIPP operator
+ */
+
+typedef struct tagCcAipp {
+  ccAippInputFormat_t inputFormat;
+  ccDataType_t outputFormat;
+  int32_t srcImageSize[CC_AIPP_IMG_DIM];
+  int32_t loadStartPos[CC_AIPP_IMG_DIM];
+  int32_t loadSize[CC_AIPP_IMG_DIM];
+  int32_t scfInputSize[CC_AIPP_IMG_DIM];
+  int32_t scfOutputSize[CC_AIPP_IMG_DIM];
+  int32_t cscMatrix[CC_AIPP_CSC_MATRIX_DIM];
+  int32_t cscOutputBias[CC_AIPP_CSC_BIAS_DIM];
+  int32_t cscInputBias[CC_AIPP_CSC_BIAS_DIM];
+  int32_t dtcPixelMean[CC_AIPP_IMG_CHN_NUM];
+  float dtcPixelMin[CC_AIPP_IMG_CHN_NUM];
+  float dtcPixelVarReci[CC_AIPP_IMG_CHN_NUM];
+  ccAippPaddingMode_t paddingMode;
+  int32_t paddingSize[CC_AIPP_IMG_DIM * 2];  // up,down,left,right
+  float cpaddingVaule;
+  bool cscSwitch;  // 0:off,1:on
+  bool scfSwitch;  // 0:off,1:on
+  bool rbuvSwapSwitch;
+  bool axSwapSwitch;
+  bool singleLineMode;
+  bool cscConfigFlag;
+  bool dtcConfigFlag;
+  bool padConfigFlag;
+  bool commConfigFlag;
+  bool aippEn;
+  bool dyncAippFlag;
+  const void *dyncParaAddr;
+  bool rotationFlag;
+} ccConvolutionAipp_t;
+
+
+typedef struct tagCcQuantizePara {
+  ccConvolutionScaleValueMode_t scaleValueMode;
+  uint16_t *scale;
+  uint16_t *offsetq;
+  int32_t *offsetw;
+  uint8_t *allOffsetw;
+  uint8_t *offsetPad;
+} CcQuantizePara_t;
+
+typedef struct tagCcQuantize {
+  ccQuantizeAlgo_t quantAlgo;
+
+  ccConvolutionScaleType_t scaleWType;  // show scaleRq,scaleDq type
+
+  CcQuantizePara_t scaleQ;
+
+  CcQuantizePara_t scaleRq;
+
+  CcQuantizePara_t scaleDq;
+
+  // need relu
+  bool reluFlag;
+
+  // relu6
+  uint16_t *scaleRelu6;
+  bool bConcat;
+} ccQuantize_t;
+
+typedef struct tagCcPad *ccPadDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief operation of Cum
+ */
+typedef enum {
+  CC_CUM_OP_SUM = 0, /**< sum */
+  CC_CUM_OP_PROD,    /**< product */
+  CC_CUM_OP_INVALID
+} CumOpType;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of tensor
+ */
+typedef struct tagCcTensor *ccTensorDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of filter tensor
+ */
+typedef struct tagCcFilter *ccFilterDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of convolution operator
+ */
+typedef struct tagCcConvolution *ccConvolutionDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of correlation operator
+ */
+typedef struct tagCcConvolution *ccCorrelationDescriptor_t;
+typedef struct tagCcFullConnection_t *ccFullConnectionDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of pooling operator
+ */
+typedef struct tagCcPooling *ccPoolingDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of activation operator
+ */
+typedef struct tagCcActivation *ccActivationDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of batchToSpace operator
+ */
+typedef struct tagCcBatchToSpace *ccBatchToSpaceDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of spaceToBatch operator
+ */
+typedef struct tagCcSpaceToBatch *ccSpaceToBatchDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of svdf operator
+ */
+typedef struct tagCcSvdf *ccSvdfDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of crop operator
+ */
+typedef struct tagCcCrop *ccCropDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of interp operator
+ */
+typedef struct tagCcInterp *ccInterpDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of GetRegionBox operator
+ */
+typedef struct tagCcGetRegionBox *ccGetRegionBoxDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of CorrectBoxes operator
+ */
+typedef struct tagCorrectBoxes *ccCorrectBoxesDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of ClsProb operator
+ */
+typedef struct tagClsProb *ccClsProbDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of NMS operator
+ */
+typedef struct tagCcNms *ccNmsDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of MultiClassNms operator
+ */
+typedef struct tagCcMultiClassNms *ccMultiClassNmsDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of MscnnBoxOutput operator
+ */
+typedef struct tagCcMscnnBoxOutput *ccMscnnBoxOutputDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief define of SoftmaxTree
+ */
+typedef void *ccSoftmaxTree_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of exp operator
+ */
+typedef struct tagCcExp *ccExpDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of log operator
+ */
+typedef struct tagCcLog *ccLogDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of pow operator
+ */
+typedef struct tagCcPow *ccPowDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of padv2 operator
+ */
+typedef struct tagCcPadV2 *ccPadV2Descriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief desciptor of ShapeClassify operator
+ */
+typedef struct tagCcShapeClassify *ccShapeClassifyDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of DetectionFull3DOutput operator
+ */
+typedef struct tagCcDetectionFull3DOutput *ccDetectionFull3DOutputDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of Quantize operator
+ */
+typedef struct tagCcQuantize *ccQuantizeDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of StridedSlice operator
+ */
+typedef struct tagCcStridedSlice *ccStridedSliceDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of StridedSliceAttrs operator
+ */
+typedef struct tagCcStridedSliceAttrs *ccStridedSliceAttrsDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of ResizeBilinear operator
+ */
+typedef struct tagCcResizeBilinear *ccResizeBilinearDescriptor_t;
+
+typedef struct tagCcEltwise *ccEltwiseDescriptor_t;
+
+typedef struct tagCcBatchNorm *ccBatchNormDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of Square operator
+ */
+typedef struct tagCcSquare *ccSquareDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of NonMaxSuppression operator
+ */
+typedef struct tagNonMaxSuppression *ccNonMaxSuppressionDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of NonMaxSuppression operator
+ */
+typedef struct tagUpsamplePara *ccUpsampleParaDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of ResizeNearestNeighbor operator
+ */
+typedef struct tagCcResizeNearestNeighbor *ccResizeNearestNeighborDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of Fill operator
+ */
+typedef struct tagCcFillParam *ccFillParamDescriptor_t;
+
+/**
+ * @ingroup dnn
+ * @brief descriptor of Argmaxmin operator
+ */
+typedef struct tagCcArgmaxmin *ccArgmaxminDescriptor_t;
+
+};  // namespace cce
+
+#endif  // DNN_BASE_HPP__
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn_op.h b/third_party/fwkacllib/inc/inc/cce/dnn_op.h
new file mode 100644
index 00000000..627b8593
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn_op.h
@@ -0,0 +1,4838 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_OP_H__
+#define DNN_OP_H__
+
+#include "cce/blas_struct.h"
+#include "cce/cce.h"
+#include "cce/customize.h"
+
+namespace cce {
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of parameters for exponential function
+ * @param [in] point to descriptor of parameters for exponential function
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateExpDescriptor(ccExpDescriptor_t *expDesc);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of parameters for logarithmic function
+ * @param [in] point to descriptor of parameters for logarithmic function
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccCreateLogDescriptor(ccLogDescriptor_t *logDesc);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of parameters for pow function
+ * @param [in] point to descriptor of parameters for pow function
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *powDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of parameters for exponential function
+ * @param [in] point to descriptor of parameters for exponential function
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyExpDescriptor(ccExpDescriptor_t *expDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of parameters for logarithmic function
+ * @param [in] point to descriptor of parameters for exponential function
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyLogDescriptor(ccLogDescriptor_t *logDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of parameters for pow function
+ * @param [in] point to descriptor of parameters for pow function
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *powDesc);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of parameters for NonMaxSuppress function
+ * @param [in] point to descriptor of parameters for NonMaxSuppress function
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of parameters for NonMaxSuppress function
+ * @param [in] point to descriptor of parameters for NonMaxSuppress function
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
+
+ccStatus_t ccTransTensorIncertPads(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
+                                   void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum, bool interweave,
+                                   bool background, uint32_t boxTypeNumMax = 0, bool isScaleVec = false);
+
+ccStatus_t ccTransTensorIncertPadsInt32(const ccTensorDescriptor_t xDesc, const void *x,
+                                        const ccTensorDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
+                                        uint32_t boxTypeNum, bool interweave, bool background);
+
+ccStatus_t ccTransMskrcnnBbox(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
+                              void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum);
+
+ccStatus_t ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensorDesc,
+                                              const ccVecQuantizePara_t *vecQuantizePara);
+
+ccStatus_t ccGetTensorDescriptorQuantizeParam(const ccTensorDescriptor_t tensorDesc,
+                                              ccVecQuantizePara_t *vecQuantizePara);
+
+/**
+ * @ingroup dnn
+ * @brief init tensor to 4d filter
+ * @param [in|out] filterDesc   descriptor of filter
+ * @param [in] format   format of filter
+ * @param [in] dataType   data type in device
+ * @param [in] k   number of output feature maps
+ * @param [in] c   number of input feature maps
+ * @param [in] h   height of filter
+ * @param [in] w   width of filter
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
+                                   int32_t k, int32_t c, int32_t h, int32_t w);
+
+ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
+                                   int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0);
+/**
+ * @ingroup dnn
+ * @brief init tensor to Fractal filter
+ * @param [in|out] filterDesc   descriptor of filter
+ * @param [in] format   format of filter
+ * @param [in] dataType   data type in device
+ * @param [in] k   number of output feature maps
+ * @param [in] c   number of input feature maps
+ * @param [in] h   height of filter
+ * @param [in] w   width of filter
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
+                                        int32_t k, int32_t c, int32_t h, int32_t w);
+
+/**
+ * @ingroup dnn
+ * @brief init tensor to Fractal filter
+ * @param [in|out] filterDesc   descriptor of filter
+ * @param [in] format   format of filter
+ * @param [in] dataType   data type in device
+ * @param [in] k   number of output feature maps
+ * @param [in] c   number of input feature maps
+ * @param [in] h   height of filter
+ * @param [in] w   width of filter
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
+                                       int32_t k, int32_t c, int32_t h, int32_t w, ccDataType_t outputDataType);
+
+/**
+ * @ingroup dnn
+ * @brief read 4d filter
+ * @param [in] filterDesc   descriptor of filter
+ * @param [in|out] format   point to format of filter
+ * @param [in|out] dataType   point to data type in device
+ * @param [in|out] k   point to number of output feature maps
+ * @param [in|out] c   point to number of input feature maps
+ * @param [in|out] h   point to height of filter
+ * @param [in|out] w   point to width of filter
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetFilterFractalDescriptor(const ccFilterDescriptor_t filterDesc, ccTensorFormat_t *format,
+                                        ccDataType_t *dataType, int32_t *k, int32_t *c, int32_t *h, int32_t *w);
+
+/**
+ * @ingroup dnn
+ * @brief get data size of 4d filter
+ * @param [in] filterDesc   descriptor of filter
+ * @param [in|out] size   point to data size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetDepthWiseConvFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, int32_t groupNum, uint32_t *size);
+
+/**
+ * @ingroup dnn
+ * @brief trans group conv filter to fractal format
+ * @param [in] filterSrcInfo   descriptor of input filter
+ * @param [in] filterSrc   input data pointer
+ * @param [in] filterDstInfo   descriptor of output filter
+ * @param [in|out] filterDst   output data pointer
+ * @param [in] group   group size
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransGroupConvFilter(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
+                                  ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
+                                  uint32_t dstSize);
+
+/**
+ * @ingroup dnn
+ * @brief trans conv filter With BoxTypeNuM to fractal format
+ * @param [in] filterSrcInfo   descriptor of input filter
+ * @param [in] filterSrc   input data pointer
+ * @param [in] filterDstInfo   descriptor of output filter
+ * @param [in|out] filterDst   output data pointer
+ * @param [in] ySizeInBytes the malloc memory size
+ * @param [in] boxTypeNum  the num of boxType
+ * @param [in] interweave whether the axis interweave
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransFilterWithBoxTypeNum(const ccFilterDescriptor_t xDesc, const void *x,
+                                       const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
+                                       uint32_t boxTypeNum, bool interweave, uint32_t boxTypeNumMax = 0);
+/**
+ * @ingroup dnn
+ * @brief trans conv filter With BoxTypeNuM to fractal format
+ * @param [in] filterSrcInfo   descriptor of input filter
+ * @param [in] filterSrc   input data pointer
+ * @param [in] filterDstInfo   descriptor of output filter
+ * @param [in|out] filterDst   output data pointer
+ * @param [in] ySizeInBytes the malloc memory size
+ * @param [in] boxTypeNum  the num of boxType
+ * @param [in] interweave whether the axis interweave
+ * @param [in] outputDataType  output DataType
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransFilterInt8WithBoxTypeNum(const ccFilterDescriptor_t wDesc, const void *x,
+                                           const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
+                                           uint32_t boxTypeNum, bool interweave, ccDataType_t outputDataType);
+
+/**
+ * @ingroup dnn
+ * @brief trans depthwise conv filter  to fractal format
+ * @param [in]  wDesc descriptor of input filter
+ * @param [in] w   input data pointer
+ * @param [in] groupNum   groupNum of conv
+ * @param [in]..yDesc descriptor of output filter
+ * @param [in|out] y   output data pointer
+ * @param [in] ySizeInBytes the malloc memory size
+ * @return ccStatus_t
+ */
+
+ccStatus_t transDepthWiseConvFilterNCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
+                                                  ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief trans depthwise conv filter  to fractal format
+ * @param [in]  wDesc descriptor of input filter
+ * @param [in] w   input data pointer
+ * @param [in] groupNum   groupNum of conv
+ * @param [in]..yDesc descriptor of output filter
+ * @param [in|out] y   output data pointer
+ * @param [in] ySizeInBytes the malloc memory size
+ * @return ccStatus_t
+ */
+ccStatus_t transDepthWiseConvFilterInt8NCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
+                                                      ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief trans depthwise conv filter  to fractal format, input format CHWN
+ * @param [in]  wDesc descriptor of input filter
+ * @param [in]..yDesc descriptor of output filter
+ * @param [in] ySizeInBytes the malloc memory size
+ * @param [in] w   input data pointer
+ * @param [in|out] y   output data pointer
+ * @return ccStatus_t
+ */
+ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
+                                                  uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
+                                                  uint32_t destSize);
+
+/**
+ * @ingroup dnn
+ * @Check if it is surpported by HighPerformance depthwise
+ * @param [in]  inputN,C,H,W   input param
+ * @param [in]  filterN,C,H,W
+ * @param [in]  dilationH,W    dilation param
+ * @param [in]  padHHead,padHtail,padWHead,padWTail pad param
+ * @param [in]  strideH,W  stride param
+ * @param [in] groupNum  Conv groupNum
+ * @param [in|out] isHighPerformance  isHighPerformance flag
+ * @return ccStatus_t
+ */
+ccStatus_t ccIsDepthwiseHighPerformance(int32_t inputN, int32_t inputC, int32_t inputH, int32_t inputW, int32_t filterN,
+                                        int32_t filterC, int32_t filterH, int32_t filterW, int32_t dilationH,
+                                        int32_t dilationW, int32_t padHHead, int32_t padHTail, int32_t padWHead,
+                                        int32_t padWTail, int32_t strideH, int32_t strideW, int32_t groupNum,
+                                        bool &isHighPerformance, bool isquant = false,
+                                        ccDataType_t inputDataType = CC_DATA_HALF,
+                                        ccDataType_t outputDataType = CC_DATA_HALF);
+
+/**
+ * @ingroup dnn
+ * @brief trans depthwise conv filter  to fractal format, input format CHWN
+ * @param [in]  wDesc descriptor of input filter
+ * @param [in]..yDesc descriptor of output filter
+ * @param [in] ySizeInBytes the malloc memory size
+ * @param [in] w   input data pointer
+ * @param [in|out] y   output data pointer
+ * @return ccStatus_t
+ */
+ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
+                                                  uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
+                                                  uint32_t destSize);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of fullconnection operator
+ * @param [in|out] fcDesc   point to descriptor of fullconnection operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of fullconnection operator
+ * @param [in] *fcDesc   descriptor of fullconnection operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init conv descriptor to 2d conv, use for beforeHasPad
+ * @param [in|out] convDesc   descriptor of convolution operator
+ * @param [in] beforepadHHead   before padding in height head
+ * @param [in] beforepadHTail   before padding in height tail
+ * @param [in] beforepadWHead   before padding in width head
+ * @param [in] beforepadWTail   before padding in width tail
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolution2dDescriptorForPad(ccConvolutionDescriptor_t convDesc, int32_t beforepadHHead,
+                                              int32_t beforepadHTail, int32_t beforepadWHead, int32_t beforepadWTail);
+
+/**
+ * @ingroup dnn
+ * @brief init conv descriptor to 2d conv, use for concat batch size
+ * @param [in|out] convDesc   descriptor of convolution operator
+ * @param [in] concatBatchSize   concat batch size
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolution2dDescriptorForConcatBatchSize(ccConvolutionDescriptor_t convDesc, int64_t concatBatchSize);
+
+/**
+ * @ingroup dnn
+ * @brief init conv descriptor to 2d conv
+ * @param [in|out] convDesc   descriptor of convolution operator
+ * @param [in] opType  operation type for append at convolution operation
+ * @param [in] opDesc  operation descritpor for the opType
+ * @return ccStatus_t
+ */
+ccStatus_t ccConvolution2dAppendOp(ccConvolutionDescriptor_t convDesc, ccOpType_t opType, const void *opDesc);
+
+/**
+ * @ingroup dnn
+ * @brief read 2d conv beforeHasPad
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in|out] beforepadHHead   before padding in height head, default is 0
+ * @param [in|out] beforepadHTail   before padding in height tail, default is 0
+ * @param [in|out] beforepadWHead   before padding in width head, default is 0
+ * @param [in|out] beforepadWTail   before padding in width tail, default is 0
+ */
+ccStatus_t ccGetConvolution2dDescriptorForPad(const ccConvolutionDescriptor_t convDesc, int32_t *beforepadHHead,
+                                              int32_t *beforepadHTail, int32_t *beforepadWHead,
+                                              int32_t *beforepadWTail);
+
+/**
+ * @ingroup dnn
+ * @brief read 2d conv concat batch size
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in|out] concatBatchSize   concat batch size, default is 0
+ */
+ccStatus_t ccGetConvolution2dDescriptorForConcatBatchSize(const ccConvolutionDescriptor_t convDesc,
+                                                          int64_t *concatBatchSize);
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of convolution forward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in] algo   algorithm of convolution forward
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetConvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
+                                                const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
+                                                const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
+                                                uint32_t *sizeInBytes);
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of convolution backward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] dyDesc   descriptor of input tensor
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] dxDesc   descriptor of output tensor
+ * @param [in] algo   algorithm of convolution forward
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetConvolutionBackwardDataWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
+                                                     const ccTensorDescriptor_t dyDesc,
+                                                     const ccFilterDescriptor_t wDesc,
+                                                     const ccTensorDescriptor_t dxDesc, ccConvolutionBwdAlgo_t algo,
+                                                     uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of fc forward computation, maybe no need temp space
+ * @param [in] handle  cce handle
+ * @param [in] fcDesc  descriptor of fc operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] sizeInBytes   temp space size need, 0 means no memeory needed
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetFullConnectionForwardWorkspaceSize(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
+                                                   const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
+                                                   const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief convolution forward computation
+ * @param [in] handle   cce handle
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] x   input data in device memory
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] w   filter data in device memory
+ * @param [in] biasDesc   descriptor of bias
+ * @param [in] bias   bias data in device memory
+ * @param [in] algo   algorithm of convolution forward
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccConvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
+                                const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
+                                const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
+                                ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
+                                const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @brief full alloc float and reset to 0
+ * @param [in] handle      cce handle
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in|out] x       output data in device memory
+ * @param [in] beta        scaling factors
+ * @return ccStatus_t
+ */
+ccStatus_t ccAllocFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                              const void *beta);
+
+/**
+ * @ingroup dnn
+ * @brief full get data set by op
+ * @param [in] handle      cce handle
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in|out] x       output data in device memory
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [out] y          output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccGetFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                            const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
+
+/**
+ * @ingroup dnn
+ * @brief full clear register
+ * @param [in] handle      cce handle
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [out] y          output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccClearFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                              const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
+
+#ifndef DAVINCI_LITE
+/**
+ * @ingroup dnn
+ * @brief convolution backward data computation
+ * @param [in] handle   cce handle
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] alpha   scaling factors
+ * @param [in] dyDesc   descriptor of input tensor
+ * @param [in] dy   input data in device memory
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] w   filter data in device memory
+ * @param [in] algo   algorithm of convolution backward
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta   scaling factors
+ * @param [in] dxDesc   descriptor of output tensor
+ * @param [in|out] dx   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccConvolutionBackwardData(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
+                                     const ccTensorDescriptor_t dyDesc, const void *dy,
+                                     const ccFilterDescriptor_t wDesc, const void *w, ccConvolutionBwdAlgo_t algo,
+                                     void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
+                                     const ccTensorDescriptor_t dxDesc, void *dx);
+#endif
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of pooling operator
+ * @param [in|out] poolingDesc   point to descriptor of pooling operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreatePoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of pooling operator
+ * @param [in] *poolingDesc   descriptor of pooling operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyPoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init pooling descriptor to 2d pooling
+ * @param [in|out] poolingDesc   descriptor of pooling operator
+ * @param [in] mode   mode of pooling
+ * @param [in] padMode   mode of padding
+ * @param [in] maxpoolingNanOpt   Nan propagation mode
+ * @param [in] windowH   height of pooling window
+ * @param [in] windowW   width of pooling window
+ * @param [in] padHHead   zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
+ * value.
+ * @param [in] padHTail   zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
+ * @param [in] padWHead   zero padding in width head,  if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
+ * value.
+ * @param [in] padWTail   zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN..
+ * @param [in] strideH   stride in height
+ * @param [in] strideW   stride in width
+ * @param [in] dataMode
+ * @param [in] ceilMode   0:Floor  1:Ceil
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetPooling2dDescriptor(ccPoolingDescriptor_t poolingDesc, ccPoolingMode_t mode, ccPaddingMode_t padMode,
+                                    ccNanPropagation_t maxpoolingNanOpt, int32_t windowH, int32_t windowW,
+                                    int32_t padHHead, int32_t padHTail, int32_t padWHead, int32_t padWTail,
+                                    int32_t strideH, int32_t strideW, int32_t dataMode, int32_t ceilMode,
+                                    ccPooingFwdAlgo_t algo = CC_POOLING_FWD_ALGO_HALF);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of 2d pooling
+ * @param [in] poolingDesc   descriptor of pooling operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in|out] n   point to batch size
+ * @param [in|out] c   point to channels
+ * @param [in|out] h   point to height of feature map
+ * @param [in|out] w   point to width of feature map
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetPooling2dForwardOutputDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc,
+                                          int32_t *n, int32_t *c, int32_t *h, int32_t *w);
+
+/**
+ * @ingroup dnn
+ * @brief pooling forward computation
+ * @param [in] handle   cce handle
+ * @param [in] poolingDesc   descriptor of pooling operator
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] x   input data in device memory
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccPoolingForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
+                            const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                            const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief pooling backward computation
+ * @param [in] handle   cce handle
+ * @param [in] poolingDesc   descriptor of pooling operator
+ * @param [in] alpha   scaling factors
+ * @param [in] beta   scaling factors
+ * @param [in] argMaskDesc   descriptor of mask tensor
+ * @param [in] argMask   mask data in device memory
+ * @param [in] dyDesc   descriptor of input tensor
+ * @param [in] dy   input data in device memory
+ * @param [in] dxDesc   descriptor of output tensor
+ * @param [in|out] dx   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMaxPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
+                                const void *beta, const ccTensorDescriptor_t argMaskDesc, const void *argMask,
+                                const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t dxDesc,
+                                void *dx);
+/**
+ * @ingroup dnn
+ * @brief create descriptor of activation operator
+ * @param [in|out] activationDesc   point to descriptor of activation operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateActivationDescriptor(ccActivationDescriptor_t *activationDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init activation descriptor to 2d activation
+ * @param [in|out] activationDesc   descriptor of activation operator
+ * @param [in] mode                 mode of activation
+ * @param [in] reluNanOpt           Nan propagation mode
+ * @param [in] coef                 ceiling for clipped RELU, alpha for ELU
+ * @param [in] activationPara       activation parameter union
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetActivationDescriptor(ccActivationDescriptor_t activationDesc, ccActivationMode_t mode,
+                                     ccNanPropagation_t reluNanOpt, double coef,
+                                     ccActivationPara_u activationPara = {{0, CC_NAN_NOT_PROPAGATE}});
+
+/**
+ * @ingroup dnn
+ * @brief read activation param
+ * @param [in] activationDesc     descriptor of activation operator
+ * @param [in|out] mode           point to mode of activation
+ * @param [in|out] reluNanOpt     point to Nan propagation mode
+ * @param [in|out] coef           point to coef
+ * @param [in|out] activationPara point to activation parameter union
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetActivationDescriptor(const ccActivationDescriptor_t activationDesc, ccActivationMode_t *mode,
+                                     ccNanPropagation_t *reluNanOpt, double *coef,
+                                     ccActivationPara_u *activationPara = NULL);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of activation operator
+ * @param [in] *activationDesc   descriptor of activation operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyActivationDescriptor(ccActivationDescriptor_t *activationDesc);
+
+/**
+ * @ingroup dnn
+ * @brief activation forward computation
+ * @param [in] handle   cce handle
+ * @param [in] activationDesc   descriptor of activation operator
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] x   input data in device memory
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccActivationForward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
+                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                               const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief Derives a tensor descriptor from layer data descriptor for BatchNormalization
+ * @param [in|out] derivedBnDesc   descriptor of mean, variance, bias, scale tensors tensor
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] mode    mode of BatchNormalization
+ * @return ccStatus_t
+ */
+ccStatus_t ccDeriveBNTensorDescriptor(ccTensorDescriptor_t derivedBnDesc, const ccTensorDescriptor_t xDesc,
+                                      ccBatchNormMode_t mode);
+
+/**
+ * @ingroup dnn
+ * @brief batchnorm forward computation
+ * @param [in] handle   cce handle
+ * @param [in] mode     mode of batchnorm
+ * @param [in] alpha    scaling factors
+ * @param [in] beta     scaling factors
+ * @param [in] xDesc    descriptor of input tensor
+ * @param [in] x        input data in device memory
+ * @param [in] yDesc    descriptor of output tensor
+ * @param [in|out] y        output data in device memory
+ * @param [in] bnScaleBiasMeanVarDesc  descriptor of scale, bias, mean, variance tensor
+ * @param [in] bnScale       scaling factor
+ * @param [in] bnBias        bias factor
+ * @param [in] estimatedMean    mean
+ * @param [in] estimatedVariance   variance
+ * @param [in] epsilon     epsilon
+ * @return ccStatus_t
+ */
+ccStatus_t ccBatchNormForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, const void *beta,
+                                       const ccTensorDescriptor_t xDesc, const void *x,
+                                       const ccTensorDescriptor_t yDesc, void *y,
+                                       const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+                                       const void *bnBias, const void *estimatedMean, const void *estimatedVariance,
+                                       double epsilon);
+
+/**
+ * @ingroup dnn
+ * @brief batchnorm forward computation
+ * @param [in] handle   cce handle
+ * @param [in] mode     mode of batchnorm
+ * @param [in] reluFlag     relu fusion flag
+ * @param [in] alpha    scaling factors
+ * @param [in] beta     scaling factors
+ * @param [in] xDesc    descriptor of input tensor
+ * @param [in] x        input data in device memory
+ * @param [in] yDesc    descriptor of output tensor
+ * @param [in|out] y        output data in device memory
+ * @param [in] bnScaleBiasMeanVarDesc  descriptor of scale, bias, mean, variance tensor
+ * @param [in] bnScale       scaling factor
+ * @param [in] bnBias        bias factor
+ * @param [in] estimatedMean    mean
+ * @param [in] estimatedVariance   variance
+ * @param [in] epsilon     epsilon
+ * @return ccStatus_t
+ */
+ccStatus_t ccBatchNormFusionForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, ccBatchNormDescriptor_t bnDesc,
+                                             const void *alpha, const void *beta, const ccTensorDescriptor_t xDesc,
+                                             const void *x, const ccTensorDescriptor_t yDesc, void *y,
+                                             const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+                                             const void *bnBias, const void *estimatedMean,
+                                             const void *estimatedVariance, double epsilon);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of batchnorm operator
+ * @param [in|out] bnDesc   point to descriptor of batchnorm operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy batchnorm descriptor
+ * @param [in] descriptor of batchnorm operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
+
+/**
+ * @ingroup dnn
+ * @brief append operation after batchnorm
+ * @param [in|out] bnDesc   descriptor of batchnorm operator
+ * @param [in] opType  operation type for append at batchnorm operation
+ * @param [in] opDesc  operation descritpor for the opType
+ * @return ccStatus_t
+ */
+ccStatus_t ccBatchNormAppendOp(ccBatchNormDescriptor_t bnDesc, ccOpType_t opType, const void *opDesc);
+
+/**
+ * @ingroup dnn
+ * @brief full get the output 4d dimension info of full connection
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] wDesc       descriptor of weight tensor
+ * @param [in|out] n       point to batch size
+ * @param [in|out] c       point to channels
+ * @param [in|out] h       point to height of feature map
+ * @param [in|out] w       point to width of feature map
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetFullConnectionFwdOutputDim(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
+                                           int32_t *n, int32_t *c, int32_t *h, int32_t *w);
+
+/**
+ * @ingroup dnn
+ * @brief full connection forward computation
+ * @param [in] handle      cce handle
+ * @param [in] fcDesc      fc desc
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] wDesc       descriptor of weight tensor
+ * @param [in] w           filter data in device memory
+ * @param [in] biasDesc    bias data in device memory
+ * @param [in] bias        descriptor of bias tensor
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFullConnectionForwardEx2(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc, const void *alpha,
+                                      const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
+                                      const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
+                                      const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief full connection forward computation with workspace
+ * @param [in] handle      cce handle
+ * @param [in] fcDesc      fc desc
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] wDesc       descriptor of weight tensor
+ * @param [in] w           filter data in device memory
+ * @param [in] biasDesc    bias data in device memory
+ * @param [in] bias        descriptor of bias tensor
+ * @param [in] workSpace   workSpace in device memory
+ * @param [in] workSpaceSizeInBytes     workSpace size in bytes
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFullConnectionForwardWithWorkSpace(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
+                                                const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                                const ccFilterDescriptor_t wDesc, const void *w,
+                                                const ccTensorDescriptor_t biasDesc, const void *bias, void *workSpace,
+                                                uint32_t workSpaceSizeInBytes, const void *beta,
+                                                const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief full softmax forward computation
+ * @param [in] handle      cce handle
+ * @param [in] algo        softmax algorithm
+ * @param [in] mode        mode of softmax
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] workSpace   workSpace in device memory
+ * @param [in] workSpaceSizeInBytes     workSpace size in bytes
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSoftmaxForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
+                            const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
+                            uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @brief full softmax forward computation
+ * @param [in] handle      cce handle
+ * @param [in] algo        softmax algorithm
+ * @param [in] softmaxAxis mode of softmax
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] workSpace   workSpace in device memory
+ * @param [in] workSpaceSizeInBytes     workSpace size in bytes
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @param [in] classNum    class number
+ * @param [in] padNum      pad Num
+ * @return ccStatus_t
+ */
+ccStatus_t ccSoftmaxClassForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
+                                 const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
+                                 uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
+                                 void *y, uint32_t classNum, uint32_t padNum);
+
+/**
+ * @ingroup dnn
+ * @brief full scale forward computation
+ * @param [in] handle      cce handle
+ * @param [in] scaleBiasDesc  descriptor of scale and bias tensor
+ * @param [in] scale       scaling factor
+ * @param [in] bias        bias factor
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccScaleForward(ccHandle_t handle, const ccTensorDescriptor_t scaleBiasDesc, const void *scale,
+                          const void *bias, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief full scale forward computation
+ * @param [in] handle      cce handle
+ * @param [in] scaleDesc  descriptor of scale and bias tensor
+ * @param [in] scale       scaling factor
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccScaleNoBiasForward(ccHandle_t handle, const ccTensorDescriptor_t scaleDesc, const void *scale,
+                                const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of depth to space
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] blockSize        the size of block
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccGetDepthToSpaceOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
+                                      int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief depth to space forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] blockSize        the size of block
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccDepthToSpaceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                 const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
+                                 void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of space to depth
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] blockSize        the size of block
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccGetSpaceToDepthOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
+                                      int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief space to depth forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] blockSize        the size of block
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccSpaceToDepthForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                 const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
+                                 void *output);
+
+/**
+ * @ingroup dnn
+ * @brief full eltwise forward computation
+ * @param [in] handle          cce handle
+ * @param [in] eltDesc         eltwise descriptor
+ * @param [in] mode            mode of eltwise
+ * @param [in] alpha           scaling factors
+ * @param [in] broadcast(Reserve) support tensor broadcasting or not
+ * @param [in] xDesc[]         array of descriptor for input tensor
+ * @param [in] x               array of input data in device memory
+ * @param [in] inputNum        the number of input tensors
+ * @param [in] beta            scaling factors
+ * @param [in] yDesc           descriptor of output tensor
+ * @param [in|out] y           output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccEltwiseForwardEx(ccHandle_t handle, ccEltwiseDescriptor_t eltDesc, ccEltwiseMode_t mode, int32_t inputNum,
+                              const void *alpha, bool broadcast, const ccTensorDescriptor_t xDesc[], const void *x[],
+                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @brief create descriptor of eltwise operator
+ * @param [in|out] eltwiseDesc   point to descriptor of eltwise operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy eltwise descriptor
+ * @param [in] descriptor of eltwise operator
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
+
+/**
+ * @ingroup dnn
+ * @brief append operation after eltwise
+ * @param [in|out] eltDesc   descriptor of eltwise operator
+ * @param [in] opType  operation type for append at eltwise operation
+ * @param [in] opDesc  operation descritpor for the opType
+ * @return ccStatus_t
+ */
+ccStatus_t ccEltwiseAppendOp(ccEltwiseDescriptor_t eltDesc, ccOpType_t opType, const void *opDesc);
+
+/**
+ * @ingroup dnn
+ * @brief set eltwise desciptor's quantize  parameters
+ * @param [in] eltDesc        eltwise descriptor
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetEltwiseQuantizeInfo(ccEltwiseDescriptor_t eltDesc, const ccQuantizeDescriptor_t QuantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of reshape forward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetReshapeForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
+                                            const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief reshape the input tensor
+ * @param [in] handle  cce handle
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   input tensor
+ * @param [in] x   input data
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   output tensor
+ * @param [in|out] y   output data
+ * @return ccStatus_t
+ */
+ccStatus_t ccReshapeForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                            void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
+                            const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @brief reshape the input tensor for data in ND format
+ * @param [in] handle  cce handle
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   input tensor
+ * @param [in] x   input data
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   output tensor
+ * @param [in|out] y   output data
+ * @return ccStatus_t
+ */
+ccStatus_t ccNdReshapeForward(ccHandle_t handle, const void *alpha, ccTensorFormat_t rawFormat,
+                              const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
+                              uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
+                              void *y);
+
+/**
+ * @ingroup dnn
+ * @brief Four2Five forward computation
+ * @param [in] handle          cce handle
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] yDesc           descriptor of output tensor
+ * @param [in | out] y         output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFour2FiveForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief Five2Four forward computation
+ * @param [in] handle          cce handle
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] yDesc           descriptor of output tensor
+ * @param [in | out] y         output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFive2FourForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                              const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of add forward computation
+ * @param [in] handle             cce handle
+ * @param [in] xDesc              descriptor of the first input tensor
+ * @param [in] wDesc              descriptor of the second input tensor
+ * @param [in] yDesc              descriptor of output tensor
+ * @param [in|out] sizeInBytes    temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetAddForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
+                                        const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
+                                        uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief Add forward computation
+ * @param [in] handle          cce handle
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               one input data in device memory
+ * @param [in] wDesc           descriptor of input tensor
+ * @param [in] w               the other input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] workSpace       the address apply in HBM
+ * @param [in] workSpaceSizeInBytes   the size apply in HBM
+ * @param [in] yDesc           descriptor of output tensor
+ * @param [in|out] y           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccAddForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                        const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
+                        uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief Stack forward computation
+ * @param [in] handle          cce handle
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x[]             x array is host mem array, the element is device address of input data
+ * @param [in] num             number of input tensor
+ * @param [in] axis            along which axis to stack the input tensor
+ * @param [in] beta            bias factors
+ * @param [in] yDesc           descriptor of output tensor
+ * @param [in|out] y           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccStackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x[],
+                          uint32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of stack
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] num              number of input tensor
+ * @param [in] axis             along which axis to stack the input tensor
+ * @param [in|out] n            point to batch size
+ * @param [in|out] c            point to channels
+ * @param [in|out] h            point to height
+ * @param [in|out] w            point to width
+ * @param [in|out] realDimCnt   point to real dimCnt after stack
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *n, int32_t *c,
+                               int32_t *h, int32_t *w, int32_t *realDimCnt);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of stack
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] num              number of input tensor
+ * @param [in] axis             along which axis to stack the input tensor
+ * @param [in|out] dimCnt       dimcnt
+ * @param [in|out] dim          save dim value
+ * @param [in| dimlen           length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *dimCnt,
+                               int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief return need grid generator or not
+ * @param [in] inputH, inputW, outputH, outputW, alignCorner(interp=true,resizeBilinear depends para align corner)
+ * @param [out] bool needGridFlag, true mean need, false mean not need
+ * @return ccStatus_t
+ */
+ccStatus_t ccIsGridGenetatorNeed(int32_t inputH, int32_t inputW, int32_t outputH, int32_t outputW, bool alignCorner,
+                                 bool &needGridFlag);
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of Deconvolution forward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] deconvDesc   descriptor of Deconvolution operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in] algo   algorithm of Deconvolution forward
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetDeconvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc,
+                                                  const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
+                                                  const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
+                                                  uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief Deconvolution forward computation
+ * @param [in] handle   cce handle
+ * @param [in] deconvDesc   descriptor of deconvolution operator
+ * @param [in] alpha   scaling factors
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] x   input data in device memory
+ * @param [in] wDesc   descriptor of filter
+ * @param [in] w   filter data in device memory
+ * @param [in] biasDesc   descriptor of bias
+ * @param [in] bias   bias data in device memory
+ * @param [in] algo   algorithm of deconvolution forward
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta   scaling factors
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccDeconvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc, const void *alpha,
+                                  const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
+                                  const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
+                                  ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
+                                  const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+#define MODE_C_N (0)
+#define MODE_N_C (1)
+
+/**
+ * [ccArgMaxForward]
+ * @param [in] handle        [handle]
+ * @param [in] alpha         [reserved parameters]
+ * @param [in] xDesc         [x tensor descriptor]
+ * @param [in] x             [innput tensor]
+ * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
+ * ]
+ * @param [in] topK          [The number that returns the maximum index or maximum value]
+ * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
+ * @param [in] beta          [reserved parameters]
+ * @param [in] yDesc         [y tensor descriptor]
+ * @param [in] y             [The max value index or max value tensor]
+ */
+ccStatus_t ccArgMaxForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                           bool outMaxVal, uint32_t topK, int32_t axis, const void *beta,
+                           const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * [ccGetArgMaxOutputDim]
+ * @param [in] xDesc         [x tensor descriptor]
+ * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
+ * ]
+ * @param [in] topK          [The number that returns the maximum index or maximum value]
+ * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
+ * @param [in|out] dimCnt    [point to the output dimCnt]
+ * @param [in|out] dim       [arrays to save dims]
+ * @param [in| dimlen        length of dim
+ */
+ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
+                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * [ccGetArgMaxOutputDim]
+ * @param [in] xDesc         [x tensor descriptor]
+ * @param [in] outMaxVaule   [Whether to return the maximum value, true: return max value; false: return max value index
+ * ]
+ * @param [in] topK          [The number that returns the maximum index or maximum value]
+ * @param [in] axis          [Describes which axis of the input Tensor to reduce across]
+ * @param [in] n             [Batch number of the output tensor]
+ * @param [in] c             [Channel of the output tensor]
+ * @param [in] h             [Height number of the output tensor]
+ * @param [in] w             [Weight number of the output tensor]
+ */
+ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
+                                int32_t *n, int32_t *c, int32_t *h, int32_t *w);
+
+/**
+ * @ingroup dnn
+ * @brief Yolo2ReorgForward computation
+ * @param [in] handle   CCE handle
+ * @param [in] stride  scale parameter
+ * @param [in] reverse  reverse parameter
+ * @param [in] alpha  alpha factor
+ * @param [in] beta  beta factor
+ * @param [in] xDesc x-tensor descriptor
+ * @param [in] x  x-tensor in device memory
+ * @param [out] workSpaceSizeInBytes  temporary work sapce size
+ * @param [out] workSpace  temporary work sapce in device memory
+ * @param [in] yDesc y-tensor descriptor
+ * @param [out] y  y-tensor in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccYolo2ReorgForward(ccHandle_t handle, int32_t stride, bool reverse, const void *alpha,
+                               const ccTensorDescriptor_t xDesc, const void *x, uint32_t workSpaceSizeInBytes,
+                               void *workSpace, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @param [in] stride  scale parameter
+ * @param [in] reverse  reverse parameter
+ * @param [in] xDesc x-tensor descriptor
+ * @param [in|out] n          point to batch size
+ * @param [in|out] c          point to channels
+ * @param [in|out] h          point to height of feature map
+ * @param [in|out] w          point to width of feature map
+ */
+ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *n, int32_t *c,
+                               int32_t *h, int32_t *w);
+
+/**
+ * @param [in] stride  scale parameter
+ * @param [in] reverse  reverse parameter
+ * @param [in] xDesc x-tensor descriptor
+ * @param [out] dimCnt               output tensor dim cnt
+ * @param [out] dim                  output tensor dim
+ * @param [in| dimlen           length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *dimCnt,
+                               int32_t dim[], int32_t dimLen);
+
+/**
+ * @param [in] xDesc x-tensor descriptor
+ * @param [out] temporary work sapce size
+ */
+ccStatus_t ccGetYolo2ReorgForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief full shuffle       channel forward computation
+ * @param [in] handle        cce handle
+ * @param [in] groupNum      number of groups in a channal
+ * @param [in] subgroupNum   number of sub-groups in a group
+ * @param [in] alpha         scaling factors
+ * @param [in] xDesc         descriptor of input tensor
+ * @param [in] x             input data in device memory
+ * @param [in] beta          scaling factors
+ * @param [in] yDesc         descriptor of output tensor
+ * @param [in|out] y         output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccShuffleChannelForward(ccHandle_t handle, int32_t groupNum, int32_t subgroupNum, const void *alpha,
+                                   const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                   const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of permute forward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] yDesc   descriptor of output tensor
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetPermuteForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
+                                            const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dim of permute forward computation
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] dimIndex    dim Index
+ * @param [in|out] dimCnt  dim count
+ * @param [in|out] dim     dim value
+ * @param [in| dimlen      length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetPermuteOutputDim(const ccTensorDescriptor_t xDesc, const int32_t dimIndex[], const int32_t dimIndexLen,
+                                 int32_t *dimCnt, int32_t *dim, int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief full permute     forward computation
+ * @param [in] handle      cce handle
+ * @param [in] dimIndex    dim Index,only support [0,1,2,3]
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @param [in] beta        scaling factors
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccPermuteForward(ccHandle_t handle, const int32_t dimIndex[], const void *alpha,
+                            const ccTensorDescriptor_t xDesc, const void *x, void *workspace,
+                            uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief full split      forward computation
+ * @param [in] handle     cce handle
+ * @param [in] alpha      scaling factors
+ * @param [in] xDesc      descriptor of input tensor
+ * @param [in] x          input data in device memory
+ * @param [in] axis       the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
+ * @param [in] num        the number of outputs
+ * @param [in] beta       scaling factors
+ * @param [in] yDescArr      descriptors of output tensors
+ * @param [in|out] yArr      output data array in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[],
+                          void *yArr[]);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimensions info of split
+ * @param [in] xDesc      descriptor of input tensor
+ * @param [in] axis       the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
+ * @param [in] num        the number of outputs
+ * @param [in] sizes      Optional, used to specify the sizes of each output tensor along split dim. The tensor x would
+ * be split evenly along split dim if sizes is NULL
+ * @param [in|out] nArr   point to the first element of batch sizes
+ * @param [in|out] cArr   point to the first element of channels
+ * @param [in|out] hArr   point to the first element of heights of feature map
+ * @param [in|out] wArr   point to the first element of widths of feature map
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
+                                      const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[],
+                                      uint32_t wArr[]);
+
+/**
+    * @ingroup dnn
+    * @brief Get split output shape(s).
+    * @param [in] xDesc         input tensor, support ND and NC1HWC0
+    * @param [in] axis          split axis, negtive axis will increased by dimCnt once time.
+    * @param [in] num           splited nums.
+    * @param [in] sizes         splited dim size on axis. if NULL was set, The input will be divided into num equally.
+    * @param [output] dimCnt    splited dimCnt array. One to one correspondence with the splited output.
+    * @param [output] dim       array of splited dim array. One to one correspondence with the splited output.
+    * @param [in| dimlen        length of dim(Pass in the length of the entire space pointed to by dim,
+                                              not just the length of the dim array, because dim is a level 2 array
+                                              dimlen = lengthof dim[][], not just lengthof dim[])
+    * @return ccStatus_t
+    */
+ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
+                                      const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief create weight compress info
+ * @param [in|out] compressInfo   point to CompressInfo
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
+
+/**
+ * @ingroup dnn
+ * @brief destory weight compress info
+ * @param [in] *compressInfo   point to CompressInfo
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
+
+/**
+ * @ingroup dnn
+ * @brief create compress table
+ * @param [in|out] compressTab   point to weight compress table
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab);
+
+/**
+ * @ingroup dnn
+ * @brief destory compress table
+ * @param [in] compressTab   point to weight compress table
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab);
+
+/**
+ * @ingroup dnn
+ * @brief get fc compress info
+ * @param [in] xDesc               descriptor of input tensor
+ * @param [in] wDesc               descriptor of weight tensor
+ * @param [in] biasDesc            descriptor of bias tensor
+ * @param [in] dataTypeTransmode   mode of data type transform
+ * @param [in] weightCompressInfo  compress info, compute based on tiling method
+ * @param [in|out] outputSize      output data size in byte
+ * @param [in|out] infoTabSize     compress info table
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
+                                       const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode,
+                                       ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize,
+                                       uint32_t *infoTabSize);
+/**
+ * @ingroup dnn
+ * @brief compress fc
+ * @param [in] wDesc               descriptor of weight tensor
+ * @param [in] w                   filter data in device memory
+ * @param [in] weightCompressInfo  compress info, compute based on tiling method
+ * @param [in] dataTypeTransmode   mode of data type transform
+ * @param [in|out] y               output data in device memory
+ * @param [in] ySize               transformed data size in byte
+ * @param [in|out] yCompressedSize compressed output data size in byte
+ * @param [in|out] infoTab         compressed info table
+ * @param [in] infoTabSize         compressed info table size in byte
+ * @return ccStatus_t
+ */
+ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w,
+                            const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode,
+                            ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize,
+                            void *infoTab, uint32_t infoTabSize);
+
+/**
+ * @ingroup dnn
+ * @brief restore compressed fc data
+ * @param [in] x               input data in device memory
+ * @param [in] xSizeInBytes    input compressed weight data size in byte
+ * @param [in|out] y           output data in device memory
+ * @param [in] ySizeInBytes    output data size in byte
+ * @return ccStatus_t
+ */
+ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes,
+                                     rtMemcpyKind_t kind);
+
+/**
+ * @ingroup dnn
+ * @brief create quantize parameters struct
+ * @param [in|out] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief destroy quantize parameters struct
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief set quantize parameters
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @param [in] scaleValMode    enmu type for quantize scale value type (normal or sqrt)
+ * @param [in] scale           quantize scale value
+ * @param [in] offset          quantize offset(when quantize algorithm is half offset or full offset,this should be
+ * configed)
+ * @param [in] offsetPad       padding value for load3d (only for half offset or full offset)
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
+                                const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad);
+
+/**
+ * @ingroup dnn
+ * @brief set Requantize parameters
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @param [in] scaleValMode    enmu type for requantize scale value type (normal or sqrt)
+ * @param [in] scale           quantize scale value
+ * @param [in] offset          quantize offset(when quantize algorithm is half offset or full offset,this should be
+ * configed)
+ * @param [in] offsetw         offset for filter (only config for full offset quantize)
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
+                                  const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw);
+
+/**
+ * @ingroup dnn
+ * @brief set Dequantize parameters
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @param [in] scaleValMode    enmu type for dequantize scale value type (normal or sqrt)
+ * @param [in] scaleDq           quantize scale value
+ * @param [in] offsetw         offset for filter (only config for full offset quantize)
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
+                                  const uint16_t *scaleDq, const int32_t *offsetw);
+
+/**
+ * @ingroup dnn
+ * @brief set convolution desciptor's quantize  parameters
+ * @param [in] convDesc        convolution descriptor
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief set convolution desciptor's all offset quantize  parameters
+ * @param [in] convDesc        convolution descriptor
+ * @param [in] offsetw         descriptor of quantize parameters
+ * @param [in] scaleReq        descriptor of quantize parameters
+ * @param [in] offset_d_next   descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW,
+                                         const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext);
+
+/**
+ * @ingroup dnn
+ * @brief set full connection desciptor's quantize  parameters
+ * @param [in] fcDesc          full connection descriptor
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc,
+                                           const ccQuantizeDescriptor_t QuantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief set pooling desciptor's quantize  parameters
+ * @param [in] poolingDesc     pooling descriptor
+ * @param [in] quantizeInfo    descriptor of quantize parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo);
+
+/**
+ * @ingroup dnn
+ * @brief  set full connection  desciptor's info table
+ * @param [in] fcDesc          full connection descriptor
+ * @param [in] infoTabSize     table size
+ * @param [in] infoTab         pointer to info table
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab,
+                                         ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF);
+
+/**
+ * @ingroup dnn
+ * @brief  set full connection  desciptor's relu flag
+ * @param [in] fcDesc          full connection descriptor
+ * @param [in] opType  operation type for append at convolution operation
+ * @param [in] opDesc  operation descritpor for the opType
+ * @return ccStatus_t
+ */
+ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp basic info
+ * @param [in] inputFormat     format of input image
+ * @param [in] loadStartPosH   vertical start position in source image
+ * @param [in] loadStartPosW   horizontal start position in source image
+ * @param [in] srcImageSizeH   vertical size of source image
+ * @param [in] srcImageSizeW   horizontal size of source image
+ * @param [in] cpaddingValue   C direction padding value
+ * @param [in] cscSwitch       csc enable or not
+ * @param [in] rbuvSwapSwitch  swap R/U and B/V position of the image
+ * @param [in] axSwapSwitch    swap RGBA->ARGB, YUVA->AYUV
+ * @param [in] singleLineMode  when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
+ * not useful.
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH,
+                                          int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue,
+                                          bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp dtc info
+ * @param [in] dtcPixelMeanChnx      Mean value for YUV or RGB data channel x
+ * @param [in] dtcPixelMinChnx       Min value for YUV or RGB data channel x
+ * @param [in] dtcPixelVarReciChnx   Reciprocal of variance or (max-min) for YUV or RGB data channel x
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2,
+                                         float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2,
+                                         float dtcPixelVarReciChn0, float dtcPixelVarReciChn1,
+                                         float dtcPixelVarReciChn2);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp pad info
+ * @param [in] paddingMode              padding mode
+ * @param [in] leftPaddingSize          left hblank/padding size
+ * @param [in] rightPaddingSize         right hblank/padding size
+ * @param [in] topPaddingSize           top padding size
+ * @param [in] bottomPaddingSize        bottom padding size
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize,
+                                         int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp csc info
+ * @param [in] cscMatrixRmCn           3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
+ * @param [in] cscOutputBiasm          output Bias for RGB to YUV, element of row m
+ * @param [in] cscInputBiasm           input Bias for YUV to RGB, element of row m
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2,
+                                         int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2,
+                                         int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2,
+                                         int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2,
+                                         int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp scf info
+ * @param [in] scfSwitch               scaling enable or not
+ * @param [in] scfInputW               input width of scaling
+ * @param [in] scfInputH               input height of scaling
+ * @param [in] scfOutputW              output width of scaling
+ * @param [in] scfOutputH              output height of scaling
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW,
+                                         int32_t scfOutputH);
+
+/**
+ * @ingroup dnn
+ * @brief check aipp param
+ * @param [in] convDesc                descriptor of conv operator
+ * @param [in] xDesc                   input tensor info
+ * @param [in] yDesc                   output tensor info
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc,
+                                   const ccTensorDescriptor_t yDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init aipp basic info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] inputFormat     format of input image
+ * @param [in] loadStartPosH   vertical start position in source image
+ * @param [in] loadStartPosW   horizontal start position in source image
+ * @param [in] srcImageSizeH   vertical size of source image
+ * @param [in] srcImageSizeW   horizontal size of source image
+ * @param [in] cpaddingValue   C direction padding value
+ * @param [in] cscSwitch       csc enable or not
+ * @param [in] rbuvSwapSwitch  swap R/U and B/V position of the image
+ * @param [in] axSwapSwitch    swap RGBA->ARGB, YUVA->AYUV
+ * @param [in] singleLineMode  when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
+ * not useful.
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat,
+                                        int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW,
+                                        int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch,
+                                        bool axSwapSwitch, bool singleLineMode);
+/**
+ * @ingroup dnn
+ * @brief init aipp dtc info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] dtcPixelMeanChnx      Mean value for YUV or RGB data channel x
+ * @param [in] dtcPixelMinChnx       Min value for YUV or RGB data channel x
+ * @param [in] dtcPixelVarReciChnx   Reciprocal of variance or (max-min) for YUV or RGB data channel x
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0,
+                                       int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0,
+                                       float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0,
+                                       float dtcPixelVarReciChn1, float dtcPixelVarReciChn2);
+/**
+ * @ingroup dnn
+ * @brief init aipp pad info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] paddingMode              padding mode
+ * @param [in] leftPaddingSize          left hblank/padding size
+ * @param [in] rightPaddingSize         right hblank/padding size
+ * @param [in] topPaddingSize           top padding size
+ * @param [in] bottomPaddingSize        bottom padding size
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode,
+                                       int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize,
+                                       int32_t bottomPaddingSize);
+
+/**
+ * @ingroup dnn
+ * @brief init aipp csc info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] cscMatrixRmCn           3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
+ * @param [in] cscOutputBiasm          output Bias for RGB to YUV, element of row m
+ * @param [in] cscInputBiasm           input Bias for YUV to RGB, element of row m
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1,
+                                       int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1,
+                                       int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1,
+                                       int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1,
+                                       int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1,
+                                       int32_t cscInputBias2);
+
+/**
+ * @ingroup dnn
+ * @brief init aipp scf info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] scfSwitch               scaling enable or not
+ * @param [in] scfInputW               input width of scaling
+ * @param [in] scfInputH               input height of scaling
+ * @param [in] scfOutputW              output width of scaling
+ * @param [in] scfOutputH              output height of scaling
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW,
+                                       int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH);
+
+/**
+ * @ingroup dnn
+ * @brief set dynamic aipp parameter address and enflag info
+ * @param [in|out] convDesc   descriptor of conv operator
+ * @param [in] dyncParaAddr            aipp parameter address
+ * @param [in] dyncAippFlag            flag to show whether to use dynamic aipp
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr,
+                                            bool dyncAippFlag, bool rotationFlag = false);
+
+/**
+ * @ingroup dnn
+ * @brief check dynamic aipp parameter
+ * @param [in] dyncParaAddr            aipp parameter address
+ * @param [in] dataLength              parameter lenght
+ * @param [in] convolutionDimW            convDimW
+ * @param [in] convolutionDimH            convDimH
+ * @return ccStatus_t
+ */
+ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW,
+                                   int64_t convolutionDimH);
+
+/*** @ingroup dnn
+ * @brief trans mean and var
+ * @param [in|out] mean' = bnScale/sqrt(var)
+ * @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
+                                      const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon);
+
+/**
+ * @ingroup dnn
+ * @brief init deconvolution adj or targetShape info.
+ * @param [in] convDesc  conv descriptor.
+ * @param [in] adjH, adjust H output.
+ * @param [in] adjW, adjust W output.
+ * @param [in] targetShape, values of output shape, if this pointer was set, ignore adj.
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj,
+                                          uint32_t targetShapeSize, const uint32_t *targetShape);
+
+/**
+ * @ingroup dnn
+ * @brief gather elements according to the indices.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the tensor from which to gather elements.
+ * @param [in] x  data point of the tensor from which to gather elements.
+ * @param [in] indicesDesc  description of the tensor of indices.
+ * @param [in] indices  data point of the tensor of indices.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                             const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta,
+                             const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of gather_nd.
+ * @param [in] xDesc  description of the tensor from which to gather elements.
+ * @param [in] indicesDesc  description of the tensor of indices.
+ * @param [output] n dim-size of n-dim.
+ * @param [output] c dim-size of c-dim.
+ * @param [output] h dim-size of h-dim.
+ * @param [output] w dim-size of w-dim.
+ * @param [output] realDimCnt real dim.
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n,
+                                  int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc,
+                                  int32_t *dimCnt, int32_t *dim, int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief tile tensor by multiples.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the tensor which to be tiled.
+ * @param [in] x  data point of the tensor which to be tiled.
+ * @param [in] multiples tile coefficient of each dim.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc,
+                         void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of tile.
+ * @param [in] xDesc  description of the dividend tensor.
+ * @param [in] multiples  multiples of each dim.
+ * @param [in|out] dimCnt    [point to the output dimCnt]
+ * @param [in|out] dim       [arrays to save dims]
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt,
+                              int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of tile.
+ * @param [in] xDesc  description of the dividend tensor.
+ * @param [in] multiples  multiples of each dim.
+ * @param [output] n dim-size of n-dim.
+ * @param [output] c dim-size of c-dim.
+ * @param [output] h dim-size of h-dim.
+ * @param [output] w dim-size of w-dim.
+ * @param [output] realDimCnt real dim.
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc,
+                              // const ccIntArrayDescriptor_t multiples,
+                              const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w,
+                              int32_t *realDimCnt);
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                 int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the dividend tensor.
+ * @param [in] x  data point of the dividend tensor.
+ * @param [in] yDesc  description of the divisor tensor.
+ * @param [in] y  data point of the divisor tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                            const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                            const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the dividend tensor.
+ * @param [in] yDesc  description of the divisor tensor.
+ * @param [output] n dim-size of n-dim.
+ * @param [output] c dim-size of c-dim.
+ * @param [output] h dim-size of h-dim.
+ * @param [output] w dim-size of w-dim.
+ * @param [output] realDimCnt real dim.
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n,
+                                 int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
+
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                             const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                             const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] realDimCnt real dim.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                  int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                            const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                            const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                 int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                         const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                              int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of LogicalOr.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                   int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of LogicalXor.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in] dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                    int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief sqrt forward:
+ * data type only support bool
+ * data format only support ND
+ * @param [in] handle cce handle
+ * @param [in] alpha common scale factor
+ * @param [in] xDesc descriptor of input data
+ * @param [in] x input data in device memory
+ * @param [in] beta common scale factor
+ * @param [in] outputDesc descriptor of output data
+ * @param [in|out] output output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                               const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief equal between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                          const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief dump data during inference, only for eng ver.
+ * @param [in] handle        cce handle
+ * @return ccStatus_t
+ */
+ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex);
+
+/**
+ * @ingroup dnn
+ * @brief logicaland between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                               const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                               const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief logical or between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                              const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y).
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                               const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                               const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of equal.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                               int32_t *dim, int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief get output shape of logicaland.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                    int32_t *dim, int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                             const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                             const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                  int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief compare between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha,
+                            const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
+                            const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [output] dimCnt dim nums.
+ * @param [output] dim dim size.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                                 int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of FillParam
+ * @param [in|out] fillParamDesc   point to descriptor of fill param
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of FillParam
+ * @param [in] *fillParamDesc   point to descriptor of fill param
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of broadcat operations.
+ * @param [in] inputNum  input number of the operation tensors.
+ * @param [in] xDesc[]  description of the input operation tensors list.
+ * @param [output] dimCnt dim-size of output tensor.
+ * @param [output] dim dim of output tensor.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
+                                            int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of maximultitensor.
+ * @param [in] inputNum  the num of input operator tensors.
+ * @param [in] xDesc[]  description of the input operator tensors list.
+ * @param [output] dimCnt dim count of output tensor.
+ * @param [output] dim array of output tensor.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
+                                        int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of minmultitensor.
+ * @param [in] inputNum  the num of input operator tensors.
+ * @param [in] xDesc[]  description of the input operator tensors list.
+ * @param [output] dimCnt dim count of output tensor.
+ * @param [output] dim array of output tensor.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
+                                        int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief MaxMultitensor forward:
+ *          data type only support float float16 and int32
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] inputNum     input tensor number
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc[]      descriptor of input tensors list
+ * @param [in] x[]          input data in device memory list
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
+                                   const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
+                                   const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief MinMultitensor forward:
+ *          data type only support float float16 and int32
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] inputNum     input tensor number
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc[]      descriptor of input data list
+ * @param [in] x[]          input data in device memory list
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
+                                   const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
+                                   const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of StridedSlice
+ * @param [in|out] stridedSliceDesc   point to descriptor of StridedSlice param
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of StridedSlice
+ * @param [in] *stridedSliceDesc   point to descriptor of StridedSlice param
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init stridedSlice descriptor_t.
+ * @param [out] stridedSliceDesc   struct of stridedslice param
+ * @param [in] dimCnt    dimension of the input tensor
+ * @param [in] begin     slice begin(include)
+ * @param [in] end       slice end index(not include)
+ * @param [in] strides   slice stride
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[],
+                                       int32_t end[], int32_t strides[]);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of StridedSlice
+ * @param [in|out] stridedSliceDesc   point to descriptor of StridedSlice attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of StridedSlice
+ * @param [in] *stridedSliceDesc   point to descriptor of StridedSlice attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
+
+/**
+ * @ingroup dnn
+ * @brief init stridedSlice mask attrs desescriptor.
+ * @param [out] attrDesc   struct of stridedslice mask attrs
+ * @param [in] beginMask     begin mask
+ * @param [in] endMask       end mask
+ * @param [in] ellipsisMask  ellipsis mask
+ * @param [in] newAxisMask   new axis mask
+ * @param [in] shrinkAxisMask  shrink axis mask
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask,
+                                            int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask,
+                                            int32_t shrinkAxisMask);
+
+/**
+ * @ingroup dnn
+ * @brief Extracts a strided slice of a tensor.
+ * @param [in] xDesc   descriptor of input data
+ * @param [in] stridedSliceDesc specifies the begin, end, strides of slice
+ * @param [in] attrDesc  reserve for optional attributes.
+ * @param [out] n       point to n size
+ * @param [out] c       point to c size
+ * @param [out] h       point to h size
+ * @param [out] w       point to w size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
+                                      const ccStridedSliceDescriptor_t stridedSliceDesc,
+                                      const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c,
+                                      int32_t *h, int32_t *w, int32_t *realDimCnt);
+
+/**
+ * @ingroup dnn
+ * @brief Extracts a strided slice of a tensor.
+ * @param [in] handle  cce handle
+ * @param [in] stridedSliceDesc specifies the  begin, end, strides of slice
+ * @param [in] attrDesc  reserve for optional attributes.
+ * @param [in] alpha   common scale factor
+ * @param [in] xDesc   descriptor of input data
+ * @param [in] x   input data in device memory
+ * @param [in] beta    common scale factor
+ * @param [in] yDesc   descriptor of output data
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc,
+                                 const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha,
+                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                 const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @
+ * @brief get out put descrition of slice tensor.
+ * @param [in] xDesc         descriptor of input data
+ * @param [in] begin         begin position of tensor
+ * @param [in] size          size to slice
+ * @param [out] n            point to n size
+ * @param [out] c            point to c size
+ * @param [out] h            point to h size
+ * @param [out] w            point to w size
+ * @param [out] realDimCnt   realdim count
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size,
+                               int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
+
+/**
+ * @ingroup dnn
+ * @brief slice of a tensor.
+ * @param [in] handle  cce handle
+ * @param [in] alpha   common scale factor
+ * @param [in] xDesc   descriptor of input data
+ * @param [in] x       input data in device memory
+ * @param [in] begin   begin position of tensor
+ * @param [in] size    size to slice
+ * @param [in] beta    common scale factor
+ * @param [in] yDesc   descriptor of output data
+ * @param [in|out] y   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta,
+                          const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief gather forward computation
+ * @param [in] handle            cce handle
+ * @param [in] paramsDesc        descriptor of params tensor
+ * @param [in] params            input data in device memory
+ * @param [in] indicesDesc       descriptor of indices tensor
+ * @param [in] indices           indices data in device memory
+ * @param [in] axis              descriptor of roi tensor
+ * @param [in] alpha             reserved
+ * @param [in] beta              reserved
+ * @param [in] outputDesc        descriptor of output tensor
+ * @param [out] output           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc,
+                           const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices,
+                           const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief gather output dim computation, for NC1HWC0
+ * @param [in] paramsDesc        descriptor of params tensor
+ * @param [in] indicesDesc       descriptor of indices tensor
+ * @param [in] axis              descriptor of roi tensor
+ * @param [out] n                dim of n
+ * @param [out] c                dim of c
+ * @param [out] h                dim of h
+ * @param [out] w                dim of w
+ * @param [out] realDimCnt       real dim count
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
+                                int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
+
+/**
+ * @ingroup dnn
+ * @brief gather output dim computation
+ * @param [in] paramsDesc        descriptor of params tensor
+ * @param [in] indicesDesc       descriptor of indices tensor
+ * @param [in] axis              descriptor of roi tensor
+ * @param [out] dimCnt           dimcnt of output
+ * @param [out] dim              dim of output
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
+                                int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief exp forward computation
+ * @param [in] handle                    cce handle
+ * @param [in] expDesc                   descriptor of expParam
+ * @param [in] expParam                  a ternary array
+ * @param [in] alpha                     reserved parameter
+ * @param [in] xDesc                     descriptor of input tensor
+ * @param [in] x                         input data in device memory
+ * @param [in] beta                      reserved parameter
+ * @param [in] yDesc                     descriptor of output tensor
+ * @param [out] y                        output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha,
+                        const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                        const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief expm1 forward:
+ *          data type only support float float16 and double
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief log1p forward:
+ *          data type only support float float16 and double
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief init descriptor for parameter of exp function
+ * @param [in|out] powDesc   descriptor of tensor
+ * @param [in] dataType   data type in device
+ * @param [in] paramCnt   number of parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt);
+
+/**
+ * @ingroup dnn
+ * @brief exp forward computation
+ * @param [in] handle                    cce handle
+ * @param [in] logDesc                   descriptor of logParam
+ * @param [in] logParam                  a ternary array
+ * @param [in] alpha                     reserved parameter
+ * @param [in] xDesc                     descriptor of input tensor
+ * @param [in] x                         input data in device memory
+ * @param [in] beta                      reserved parameter
+ * @param [in] yDesc                     descriptor of output tensor
+ * @param [in] y                         output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha,
+                        const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                        const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief init descriptor for parameter of log function
+ * @param [in|out] logDesc   descriptor of tensor
+ * @param [in] dataType   data type in device
+ * @param [in] paramCnt   number of parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt);
+
+/**
+ * @ingroup dnn
+ * @brief pow forward computation
+ * @param [in] handle                    cce handle
+ * @param [in] powDesc                   descriptor of logParam
+ * @param [in] powParam                  a ternary array
+ * @param [in] alpha                     reserved parameter
+ * @param [in] xDesc                     descriptor of input tensor
+ * @param [in] x                         input data in device memory
+ * @param [in] beta                      reserved parameter
+ * @param [in] yDesc                     descriptor of input tensor
+ * @param [in] y                         input data in device memory
+ * @param [in] zDesc                     descriptor of output tensor
+ * @param [out] z                        output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha,
+                        const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
+                        const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z);
+
+/**
+ * @brief init descriptor for parameter of pow function
+ * @param [in|out] powDesc   descriptor of tensor
+ * @param [in] dataType   data type in device
+ * @param [in] paramCnt   number of parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt);
+
+/**
+ * @ingroup dnn
+ * @brief non max suppression forward.
+ * @param [in] handle                 cce handle
+ * @param [in] nonmaxParaDesc         descriptor of para
+ * @param [in] nonmaxPara             input para in host memory
+ * @param [in] maxoutputsizex         input para in host memory
+ * @param [in] alpha                  common scale factor
+ * @param [in] boxesDesc              descriptor of input data boxesDesc
+ * @param [in] boxes                  input data boxes in device memory
+ * @param [in] scoresDesc             descriptor of input data boxesDesc
+ * @param [in] scores                 input data scores in device memory
+ * @param [in] workSpaceSizeInBytes   workspace size
+ * @param [in] workSpace              input workspace in device memory
+ * @param [in] beta                   common scale factor
+ * @param [in] outputDesc             descriptor of output data
+ * @param [in|out] output             output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc,
+                                      const void *nonmaxPara, const int *maxoutputsize, const void *alpha,
+                                      const ccTensorDescriptor_t boxesDesc, const void *boxes,
+                                      const ccTensorDescriptor_t scoresDesc, const void *scores,
+                                      const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta,
+                                      const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @brief init descriptor for parameter of NonMaxSuppression function
+ * @param [in|out] powDesc   descriptor of tensor
+ * @param [in] dataType   data type in device
+ * @param [in] paramCnt   number of parameters
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc,
+                                            ccDataType_t dataType, uint32_t paramCnt);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of resizeBilinear op.
+ * @param [in] xDesc                    descriptor of input data
+ * @param [in] resizeBilinearDesc       descriptor of resize_bilinear operator
+ * @param [out] dimCnt
+ * @param [out] dim[]                   dim of output
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc,
+                                        const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt,
+                                        int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of interp op.
+ * @param [in] xDesc                    descriptor of input data
+ * @param [in] resizeBilinearDesc       descriptor of resize_bilinear operator
+ * @param [out] dimCnt
+ * @param [out] dim[]                   dim of output
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
+                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief resize bilinear forward for t network.
+ * @param [in] handle    cce handle
+ * @param [in] resizeBilinearDesc   descriptor of resize_bilinear operator
+ * @param [in] alpha     common scale factor
+ * @param [in] xDesc     descriptor of input data
+ * @param [in] x         input data in device memory
+ * @param [in] beta      common scale factor
+ * @param [in] yDesc     descriptor of output data
+ * @param [in|out] y     output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
+                                   const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                   const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief resize bilinear forward for c network.
+ * @param [in] handle    cce handle
+ * @param [in] resizeBilinearDesc   descriptor of resize_bilinear operator
+ * @param [in] alpha     common scale factor
+ * @param [in] xDesc     descriptor of input data
+ * @param [in] x         input data in device memory
+ * @param [in] beta      common scale factor
+ * @param [in] yDesc     descriptor of output data
+ * @param [in|out] y     output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha,
+                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                           const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of ResizeBilinear
+ * @param [in|out] resizeBilinearDesc   point to descriptor of resizeBilinear attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of Interp
+ * @param [in|out] resizeBilinearDesc   point to descriptor of resizeBilinear attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
+
+/**
+ * @ingroup dnn
+ * @brief set descriptor of resizeBilinear.
+ * @param [in|out] resizeBilinearDesc   descriptor of resize_bilinear operator
+ * @param [in] resizeOutputDimMode      way to decide output dimensions
+ * @param [in] alignCorners             whether the centers of input and output are aligned
+ * @param [in] zoom_factor              zoom factor
+ * @param [in] shrink_factor            shrink factor
+ * @param [in] height                   height of output
+ * @param [in] width                    width of output
+ * @param [in] pad_begin                padding at begin of input
+ * @param [in] pad_end                  padding at end of input
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc,
+                                         ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners,
+                                         int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width,
+                                         int32_t pad_begin, int32_t pad_end);
+
+/**
+ * @ingroup dnn
+ * @brief fill forward computation
+ * @param [in] handle                  cce handle
+ * @param [in] fillParamDesc           descriptor of fill parameter
+ * @param [in] alpha                   reserved
+ * @param [in] givenDesc               descriptor of given tensor
+ * @param [in] givenData               given data in device memory
+ * @param [in] workspace               space for fill algorithm
+ * @param [in] workSpaceSizeInBytes    space size in byte
+ * @param [in] beta                    reserved
+ * @param [in] outputDesc              descriptor of output tensor
+ * @param [out] output                 output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha,
+                         const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace,
+                         const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
+                         void *output);
+
+/**
+ * @ingroup dnn
+ *[ccGetFillWorkspaceSize]
+ *@param fillType         [fill type]
+ *@param givenDesc        [given tensor descriptor]
+ *@param xDesc            [input tensor descriptor]
+ *@param sizeInBytes      [output size]
+ *@return ccStatus_t      [status]
+ */
+ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc,
+                                  uint32_t *sizeInBytes);
+
+/**
+ *[ccCast]
+ *@param handle     [cce handler]
+ *@param alpha       [alpha]
+ *@param xDesc      [tensor Description of tensor x]
+ *@param x             [input tensor x]
+ *@param beta         [beta
+ *@param yDesc      [tensor Description of tensor y]
+ *@param y             [output tensor y]
+ *@return ccStatus_t  [status]
+ */
+ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                  const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief round forward:
+ *          data type only support float float16 and int32
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief rint forward:
+ *          data type only support float float16
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief sqrt forward:
+ *          data type only support float float16
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ *[ccCast]
+ *@param filterSrcInfo     [cce filtersrc descriptor]
+ *@param filterSrc       [filterSrc address]
+ *@param filterDstInfo      [cce filterdst descriptor]
+ *@param filterDst             [filterdst address]
+ *@param group         [group]
+ *@param ySizeInBytes      [fraczfilter size]
+ *@param outputDataType            [datatype]
+ *@return ccStatus_t  [status]
+ */
+ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
+                                      ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
+                                      uint32_t ySizeInBytes, ccDataType_t outputDataType);
+
+/**
+ *[ccGetConcatOutputDim]
+ *@param xDesc[]     [input tensor descriptor]
+ *@param axis        [concat axis]
+ *@param inputNum    [input tensor numbers]
+ *@param dim[]       [output dim]
+ *@param [in| dimlen        length of dim
+ *@return ccStatus_t [status]
+ */
+ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt,
+                                int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of reduce.
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] axis             The dimensions to reduce
+ * @param [in] keepDims         If true, retains reduced dimensions with length 1.
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims,
+                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief reduce sum forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce max forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce min forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce mean forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                               const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce prod forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                               const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                               const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce all forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
+                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ *@brief print times stats
+ *@return ccStatus_t  [status]
+ */
+ccStatus_t ccPrintTimeStat();
+
+/**
+ * @ingroup dnn
+ * @brief reduce abs sum forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
+                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                 const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reduce square sum forward computation
+ * @param [in] handle          cce handle
+ * @param [in] axis            The dimensions to reduce
+ * @param [in] keepDims        If true, retains reduced dimensions with length 1.
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
+                                    const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                    const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of crop and resize
+ * @param [in] imageDesc             descriptor of images
+ * @param [in] boxesDesc             descriptor of boxes
+ * @param [in] boxidxDesc            descriptor of boxidx
+ * @param [in] resizeHeight          resize height
+ * @param [in] resizeWidth           resize width
+ * @param [out] dimCnt               dimcnt of output
+ * @param [out] dim                  dim of output
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc,
+                                       const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight,
+                                       const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief crop and resize forward.
+ * @param [in] handle                cce handle
+ * @param [in] alpha                 common scale factor
+ * @param [in] imageDesc             descriptor of images
+ * @param [in] image                 input data in device memory
+ * @param [in] boxesDesc             descriptor of boxes
+ * @param [in] boxes                 input data in device memory
+ * @param [in] boxidxDesc            descriptor of boxidx
+ * @param [in] boxidx                input data in device memory
+ * @param [in] method                enum of resize method
+ * @param [in] extrapolationValue    Value used for extrapolation, when applicable
+ * @param [in] beta                  common scale factor
+ * @param [in] outputDesc            descriptor of output data
+ * @param [out] output               output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue,
+                                  const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image,
+                                  const ccTensorDescriptor_t boxesDesc, const void *boxes,
+                                  const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta,
+                                  const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief select forward computation
+ * @param [in] handle            cce handle
+ * @param [in] alpha             reserved
+ * @param [in] condDesc          descriptor of cond tensor
+ * @param [in] cond              cond data in device memory
+ * @param [in] xDesc             descriptor of x tensor
+ * @param [in] x                 x data in device memory
+ * @param [in] yDesc             descriptor of y tensor
+ * @param [in] y                 y data in device memory
+ * @param [in] beta              reserved
+ * @param [in] outputDesc        descriptor of output tensor
+ * @param [out] output           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond,
+                    const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y,
+                    const void *beta, const ccTensorDescriptor_t outDesc, void *out);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of where
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief where forward computation
+ * @param [in] handle            cce handle
+ * @param [in] alpha             reserved
+ * @param [in] condDesc          descriptor of cond tensor
+ * @param [in] cond              cond data in device memory
+ * @param [in] xDesc             descriptor of x tensor
+ * @param [in] x                 x data in device memory
+ * @param [in] yDesc             descriptor of y tensor
+ * @param [out] y                y data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                   const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief reverse forward.
+ * @param [in] handle       cce handle
+ * @param [in] axis         dim that need reverse
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha,
+                            const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                            const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief floor forward:
+ *          data type only support float float16
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief ceil forward:
+ *          data type only support float float16
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of truncate mod
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] yDesc            descriptor of input tensor
+ * @param [out] dimCnt        [dim count of the output tensor]
+ * @param [out] dim[]         [shape of the output tensor]
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc,
+                                     int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief truncate mod forward computation
+ * @param [in] handle          cce handle
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] yDesc           descriptor of input tensor
+ * @param [in] y               input data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [out] output         output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                                const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief Spatial Pyramid Pooling
+ * @param [in] handle                cce handle
+ * @param [in] alpha                 reserved
+ * @param [in] xDesc                 descriptor of input tensor
+ * @param [in] x                     input data in device memory
+ * @param [in] workspace             temp workspace
+ * @param [in] workspaceSizeInBytes  temp workspace size
+ * @param [in] pyramidHeight         pyramid height
+ * @param [in] poolingMode           pooling mode
+ * @param [in] beta                  reserved
+ * @param [in] outputDesc            descriptor of output tensor
+ * @param [out] output               output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                        void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight,
+                        const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc,
+                        void *output);
+/**
+ * @ingroup dnn
+ * @brief Get Spatial Pyramid Pooling output dim
+ * @param [in] xDesc                 descriptor of input tensor
+ * @param [in] pyramidHeight         pyramid height
+ * @param [in] dimLen                length of dim
+ * @param [out] dimCnt               output tensor dim cnt
+ * @param [out] dim                  output tensor dim
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt,
+                             int32_t dim[], const int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief Get Spatial Pyramid Pooling workspace size
+ * @param [in] xDesc                 descriptor of input tensor
+ * @param [in] pyramidHeight         pyramid height
+ * @param [out] workspaceSizeInBytes workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight,
+                                 uint32_t *workspaceSizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief BNLL forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                         const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief bias forward.
+ * @param [in] handle       cce handle
+ * @param [in] axis            axis
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data x
+ * @param [in] x            input data x in device memory
+ * @param [in] biasDesc        descriptor of input data bias
+ * @param [in] bias            input data bias in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc,
+                         const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta,
+                         const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief threshold forward computation
+ * @param [in] handle           cce handle
+ * @param [in] threshold        threshold
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha,
+                              const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                              const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief shufflechannel forward.
+ * @param [in] handle    cce handle
+ * @param [in] alpha     common scale factor
+ * @param [in] group     number of groups
+ * @param [in] xDesc     descriptor of input data
+ * @param [in] x         input data in device memory
+ * @param [in] beta      common scale factor
+ * @param [in] outputDesc     descriptor of output data
+ * @param [in|out] output     output data in device memory
+ * @return ccStatus_t
+ */
+// TODO AICPU: please add shufflechannel custom params and comment
+ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group,
+                                   const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                   const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief mvn forward.
+ * @param [in] handle               cce handle
+ * @param [in] acrossChannel        across channel. true: across, false: not
+ * @param [in] normalizeVariance    normalizeVariance. true: normalizeVariance, false: not
+ * @param [in] alpha                common scale factor
+ * @param [in] xDesc                descriptor of input data
+ * @param [in] x                    input data in device memory
+ * @param [in] beta                 common scale factor
+ * @param [in] outputDesc           descriptor of output data
+ * @param [in|out] output           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha,
+                        const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes,
+                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the workspace size of mvn
+ * @param [in] xDesc                descriptor of input data
+ * @param [in] acrossChannel        across channel. true: across, false: not
+ * @param [in|out] sizeInBytes      Workspace size need for whole computation
+ */
+ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief heatmap2coord forward output is hotspot value and corresponding coordinates
+ * @param [in] handle        cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x        input data in device memory
+ * @param [in] coordh       calibration high
+ * @param [in] coordw       calibration wide
+ * @param [in] beta        common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                  int32_t coordh, int32_t coordw, const void *beta,
+                                  const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of heatmap2coord
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim           arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief swish forward.
+ * @param [in] handle           cce handle
+ * @param [in] scale            param of swish function, y = x / (1 + sigmoid(scale * x))
+ * @param [in] alpha            common scale factor
+ * @param [in] xDesc            descriptor of input data
+ * @param [in] x                input data in device memory
+ * @param [in] beta             common scale factor
+ * @param [in] outputDesc       descriptor of output data
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc,
+                          const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize,
+                       const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[],
+                       int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[],
+                                   int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle,
+                                   int32_t outputNum, const ccTensorDescriptor_t xDesc[],
+                                   const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size);
+#endif
+/**
+ * @ingroup dnn
+ * @brief embedding lookup forward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data x
+ * @param [in] x            input data x in device memory
+ * @param [in] idxDesc        descriptor of input data idx
+ * @param [in] idx            input data idx in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
+                                    const void *x, const ccTensorDescriptor_t idxDesc, const void *idx,
+                                    const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup
+ * @brief embedding lookup forward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] inputNum   inputNum
+ * @param [in] xDesc[]        descriptor array of input data x
+ * @param [in] x[]            input data x array in device memory
+ * @param [in] workSpace    workSpace addr
+ * @param [in] workSpaceSizeInBytes    workSpace size
+ * @param [in] idxDesc        descriptor of input data idx
+ * @param [in] idx            input data idx in device memory
+ * @param [in] partitionStrategy  partitionStrategy
+ * @param [in] maxNorm            addr of maxNorm
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum,
+                                    const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace,
+                                    const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc,
+                                    const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm,
+                                    const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ *[ccGetEmbeddingLookupOutputDim]
+ *@param inputNum    [input tensor numbers]
+ *@param xDesc[]     [input tensor descriptor]
+ *@param idxDesc     [idx tensor descriptor]
+ *@param dimCnt      [output dim count]
+ *@param dim[]       [output dim]
+ *@param [in| dimlen        length of dim
+ *@return ccStatus_t [status]
+ */
+ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[],
+                                         const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[],
+                                         int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ *[ccGetEmbeddingLookupWorkspaceSize]
+ *@param inputNum    [input tensor numbers]
+ *@param idxDesc      [input tensor descriptor]
+ *@param isMaxNormExist      [isMaxNormExist]
+ *@param sizeInBytes      [output size]
+ *@return ccStatus_t [status]
+ */
+ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc,
+                                             const bool isMaxNormExist, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief check if it is the first layer of resnet50 and semecefc
+ * @param [in] tensorDesc           descriptor of input tensor.
+ * @param [in] convDesc             conv descriptor.
+ * @param [in] filterDesc           descriptor of weight tensor.
+ * @return ccStatus_t
+ */
+ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc,
+                             const ccFilterDescriptor_t filterDesc);
+
+#ifndef DAVINCI_LITE
+/**
+ * @ingroup dnn
+ * @brief convolution forward computation
+ * @param [in] handle   cce handle
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] alpha   scaling factors
+ * @param [in] beta   scaling factors
+ * @param [in] xDesc  x descriptor of input tensor
+ * @param [in] x   x data in device memory
+ * @param [in] dyDesc   descriptor of dy
+ * @param [in] dy   dy data in device memory
+ * @param [in] dwDesc   descriptor of dwDesc
+ * @param [out] dw   dw data in device memory
+ * @param [in] algo   algorithm of convolution forward
+ * @param [in] workSpace   temp space, maybe NULL if no need temp space
+ * @param [in] workSpaceSizeInBytes   sizeof workspace
+ * @return ccStatus_t
+ */
+ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha,
+                                       void *beta, const ccTensorDescriptor_t xDesc, const void *x,
+                                       const ccTensorDescriptor_t dyDesc, const void *dy,
+                                       const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo,
+                                       void *workSpace, uint32_t workSpaceSizeInBytes);
+#endif
+
+/**
+ * @ingroup dnn
+ * @brief get the temp space size of convolution forward computation, maybe no need temp space
+ * @param [in] handle   cce handle
+ * @param [in] dyDesc   descriptor of input tensor dy
+ * @param [in] convDesc   descriptor of convolution operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in] dwDesc   descriptor of filter
+ * @param [in] algo   algorithm of convolution forward
+ * @param [in|out] sizeInBytes   temp space size need for specified algorithm
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc,
+                                                       const ccConvolutionDescriptor_t convDesc,
+                                                       const ccTensorDescriptor_t xDesc,
+                                                       const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo,
+                                                       uint32_t *sizeInBytes);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff,
+                                        const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff,
+                                        const ccTensorDescriptor_t xDesc, const void *x,
+                                        const ccTensorDescriptor_t dyDesc, const void *dy,
+                                        const ccTensorDescriptor_t dxDesc, void *dx,
+                                        const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale,
+                                        void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace,
+                                        const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean,
+                                        const void *SaveInvVariance);
+#endif
+
+ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
+                                                        ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc,
+                                                        ccTensorDescriptor_t dxDesc,
+                                                        ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha,
+                                               const void *beta, const ccTensorDescriptor_t xDesc, const void *x,
+                                               const ccTensorDescriptor_t yDesc, void *y,
+                                               const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+                                               const void *bnBias, double exponentialAverageFactor,
+                                               void *resultRunningMean, void *resultRunningVariance, void *workSpace,
+                                               uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean,
+                                               void *resultSaveInvVariance, const bool isTraining);
+#endif
+
+ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
+                                                               ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc,
+                                                               const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
+                                                               uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief generate an random normal Tensor use given on/off scale.
+ * @param [in] handle        Stream handle.
+ * @param [in] alpha         reserved.
+ * @param [in] meanDesc      Mean description of one-hot position.
+ * @param [in] mean          Data pointer of mean.
+ * @param [in] scaleDesc     On/off scale description.
+ * @param [in] scale         Data pointer of on/off scale.
+ * @param [in] seed          random seed used to generate random number
+ * @param [in] seed2         random seed used to generate random number
+ * @param [in] beta          reserved.
+ * @param [in] outputDesc    Description of the generated one-hot tensor.
+ * @param [output] output    Data pointer of output.
+ * @return ccStatus_t
+ */
+ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc,
+                                 const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale,
+                                 const int64_t seed1, const int64_t seed2, const void *beta,
+                                 const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief generate random uniform tensor.
+ * @param [in] handle        Stream handle.
+ * @param [in] alpha         reserved.
+ * @param [in] minvalDesc    Mean description of one-hot position.
+ * @param [in] minval        Data pointer of mean.
+ * @param [in] maxvalDesc    On/off scale description.
+ * @param [in] maxval        Data pointer of on/off scale.
+ * @param [in] seed          random seed used to generate random number
+ * @param [in] seed2         random seed used to generate random number
+ * @param [in] beta          reserved.
+ * @param [in] outputDesc    Description of the generated one-hot tensor.
+ * @param [output] output    Data pointer of output.
+ * @return ccStatus_t
+ */
+ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc,
+                                  const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval,
+                                  const int64_t seed1, const int64_t seed2, const void *beta,
+                                  const ccTensorDescriptor_t outputDesc, void *output);
+
+/**^M
+ * @ingroup dnn^M\r	10932
+ * @brief generate BatchMatMul tensor.^M\r	10933
+ * @param [in] handle        Stream handle.^M\r	10934
+ * @param [in] alpha         reserved.^M\r	10935
+ * @param [in] xDesc         tensorA Desc.^M\r	10936
+ * @param [in] x             Data pointer of tensorA.^M\r	10937
+ * @param [in] yDesc         tensorB Desc.^M\r	10938
+ * @param [in] y             Data pointer of tensorB.^M\r	10939
+ * @param [in] beta          reserved.^M\r	10940
+ * @param [in] adj_x         tensorA transpose flag^M\r	10941
+ * @param [in] adj_y         tensorB transpose flag^M\r	10942
+ * @param [in] outpDesc      Description of the tensor output .^M\r	10943
+ * @param [output] out       Data pointer of output.^M\r	10944
+ * @return ccStatus_t^M
+ */
+ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x,
+                                const bool adj_y, const ccTensorDescriptor_t outDesc, void *out);
+
+ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x,
+                                     bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief generator conv int8 all offset factor
+ * @param [in] para   the struct for scale and offset of input, filter and output
+ * @param [in|out] offsetW   offset of filter
+ * @param [in|out] offsetPad   offset of input
+ * @param [in|out] scaledQrq   scale computing result of input , filter and output
+ * @param [in|out] nextoffsetq   offset of output
+ * @return ccStatus_t
+ */
+ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad,
+                                     uint16_t &scaledQrq, uint16_t &nextoffsetq);
+
+/**
+ * @ingroup dnn
+ * @brief get conv int8 all offset fracZ size
+ * @param [in] filterDesc   descriptor of filter tensor
+ * @param [in|out] conv int8 all offset fracZ size
+ * @param [in] groupNum   group conv num
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc);
+
+ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc,
+                                                    const ccFilterDescriptor_t filterDesc, uint32_t &size,
+                                                    uint32_t groupNum);
+
+/**
+ * @ingroup dnn
+ * @brief transform filter in conv int8 all offset scene
+ * @param [in] filterSrcInfo    descriptor of filter tensor before fracZ transform
+ * @param [in] filterSrc        filter addr before fracZ transform
+ * @param [in] filterDstInfo   descriptor of filter tensor after fracZ transform
+ * @param [in] filterDst   filter addr after fracZ transform
+ * @param [in] quantPara   the struct for scale and offset of input, filter and output
+ * @param [in] ySizeInBytes   filter size after fracZ transform
+ * @param [in|out] outputDataType   output data type
+ * @param [in] groupNum   group conv num
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
+                                      ccFilterDescriptor_t filterDstInfo, void *filterDst,
+                                      const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes,
+                                      ccDataType_t outputDataType, uint32_t groupNum);
+
+/**
+ * @ingroup dnn
+ * @brief transform bias in conv int8 all offset scene
+ * @param [in] filterDesc    descriptor of filter tensor
+ * @param [in] biasDesc     descriptor of bias tensor
+ * @param [in] quantPara   the struct for scale and offset of input, filter and output
+ * @param [in] w      filter addr
+ * @param [in] bias   bias addr
+ * @return ccStatus_t
+ */
+ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc,
+                                    const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias);
+
+/**
+ * @ingroup dnn
+ * @get dequantize
+ * @param [in] handle  handle id
+ * @param [in] alpha  alpha addr
+ * @param [in] xDesc the input Desc  descriptor
+ * @param [in] x   x data addr
+ * @param [in] beta beta data addr
+ * @param [in] yDesc the output Desc  descriptor
+ * @param [in] y   y data addr
+ * @return ccStatus_t
+ */
+ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
+                                   const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+/**
+ * @ingroup dnn
+ * @get quantize
+ * @param [in] handle  handle id
+ * @param [in] alpha  alpha addr
+ * @param [in] xDesc the input Desc  descriptor
+ * @param [in] x   x data addr
+ * @param [in] beta beta data addr
+ * @param [in] yDesc the output Desc  descriptor
+ * @param [in] y   y data addr
+ * @return ccStatus_t
+ */
+ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                 const void *beta, const ccTensorDescriptor_t yDesc, void *y);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
+                                const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc,
+                                const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
+#endif
+
+ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha,
+                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                           const ccTensorDescriptor_t yDesc, void *y);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of top k v2
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] yDesc            descriptor of input tensor y
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k,
+                                const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief top k v2 forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] x                input data x in device memory
+ * @param [in] yDesc            descriptor of input tensor y
+ * @param [in] y                input data y in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                           const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted,
+                           const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes,
+                           const ccTensorDescriptor_t outputValuesDesc, void *outputValues,
+                           const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices);
+
+/**
+ * @ingroup dnn
+ * @brief get the workspace size of top k v2
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] yDesc            descriptor of input tensor y
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] sizeInBytes  point to workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc,
+                                           const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis,
+                                           uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief Get unsorted segment reduction output dim
+ * @param [in] xDesc                 descriptor of input tensor
+ * @param [in] segmentIdsDesc        descriptor of input segmentIds tensor
+ * @param [in] segmentsNum           output slice num
+ * @param [out] dimCnt               output tensor dim cnt
+ * @param [out] dim                  output tensor dim
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc,
+                                                  const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum,
+                                                  int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief reduce all forward computation
+ * @param [in] handle          cce handle
+ * @param [in] segmentsNum     output slice num
+ * @param [in] alpha           scaling factors
+ * @param [in] xDesc           descriptor of input tensor
+ * @param [in] x               input data in device memory
+ * @param [in] segmentIdsDesc  descriptor of input segmentIds tensor
+ * @param [in] x               input segmentIds data in device memory
+ * @param [in] beta            bias factors
+ * @param [in] outputDesc      descriptor of output tensor
+ * @param [in|out] output      output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
+                                       const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds,
+                                       const int32_t segmentsNum, const void *beta,
+                                       const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief reverse sequence forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] x                input data x in device memory
+ * @param [in] yDesc            descriptor of input tensor y
+ * @param [in] y                input data y in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc,
+                                    const void *input, const ccTensorDescriptor_t seqLengthsDesc,
+                                    const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta,
+                                    const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief realdiv between two tensors.
+ * @param [in] alpha  reserved.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] x  data point of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [in] y  data point of the right operator tensor.
+ * @param [in] beta  reserved.
+ * @param [in] outputDesc  description of the output tensor.
+ * @param [output] output  data point of the output tensor.
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                          const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
+                          const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get output shape of realdiv.
+ * @param [in] xDesc  description of the left operator tensor.
+ * @param [in] yDesc  description of the right operator tensor.
+ * @param [out] dimCnt       output tensor dim cnt
+ * @param [out] dim          output tensor dim
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
+                               int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief invert permutation forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
+                                      const void *x, const void *beta, const ccTensorDescriptor_t outputDesc,
+                                      void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the workspace size of non max suppression
+ * @param [in] handle            descriptor of handle
+ * @param [in] scoresDesc        descriptor of input tensor scoresDesc
+ * @param [in] boxesDesc         descriptor of input tensor boxesDesc
+ * @param [in|out] sizeInBytes   point to workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc,
+                                               const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dim of non max suppression
+ * @param [in] scoresDesc            descriptor of input tensor scoresDesc
+ * @param [in] maxOutPutSize         the max size of output
+ * @param [in|out] dimCnt            point to the count of dim
+ * @param [in|out] dim[]             the array of output dim
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize,
+                                           int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief multinomial forward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x        input data in device memory
+ * @param [in] numSamples    number of independent samples to draw for each row slice
+ * @param [in] seed1   sed to create a random seed for the distribution
+ * @param [in] seed2  sed to create a random seed for the distribution
+ * @param [in] workSpace  work space for inter access
+ * @param [in] workSpaceSizeInBytes  work space size
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace,
+                                uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
+                                void *output);
+/**
+ * @ingroup dnn
+ * @brief get output dim of generated one-hot tensor.
+ * @param [in] indicesDesc   Indices description of one-hot position.
+ * @param [in] depth         On/off value description.
+ * @param [in] axis          Data pointer of on/off value.
+ * @param [output] dimCnt    Description of the generated one-hot tensor.
+ * @param [output] dim       Data pointer of output.
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt,
+                                int32_t *dim, int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief generate an one-hot Tensor use given on/off value.
+ * @param [in] handle        Stream handle.
+ * @param [in] alpha         reserved.
+ * @param [in] indicesDesc   Indices description of one-hot position.
+ * @param [in] indices       Data pointer of indices.
+ * @param [in] onDesc        On value description.
+ * @param [in] on            Data pointer of on value.
+ * @param [in] offDesc       Off value description.
+ * @param [in] off           Data pointer of off value.
+ * @param [in] depth         On/off value description.
+ * @param [in] axis          Data pointer of on/off value.
+ * @param [in] beta          reserved.
+ * @param [in] outputDesc    Description of the generated one-hot tensor.
+ * @param [output] output    Data pointer of output.
+ * @return ccStatus_t
+ */
+ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc,
+                           const void *indices, const ccTensorDescriptor_t onDesc, const void *on,
+                           const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis,
+                           const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief get the workspaceSize of multinomial
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] numSamples       number sample
+ * @param [out] sizeInBytes       wor space size of byte
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of multinomial
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] numSample        number of independent samples to draw for each row slice
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt,
+                                     int32_t dim[], int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of BiasAddBackward
+ * @param [in] dyDesc            descriptor of input tensor
+ * @param [in] out] n             outputTensor [N]CHW
+ * @param [in|out] c             outputTensor N[C]HW
+ * @param [in|out] h             outputTensor NC[H]W
+ * @param [in|out] w            outputTensor NCH[W]
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h,
+                                         int32_t *w);
+
+/**
+ * @ingroup dnn
+ * @brief biasadd backward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] dyDesc       descriptor of input data
+ * @param [in] dy       input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] dbDesc   descriptor of output data
+ * @param [in|out] db   output data in device memory
+ * @return ccStatus_t
+ */
+#ifndef DAVINCI_LITE
+ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy,
+                             const void *beta, const ccTensorDescriptor_t dbDesc, void *db);
+
+ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
+                                      const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                      const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc,
+                                      void *argMask);
+#endif
+
+ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
+
+ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
+
+ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format,
+                                            ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w,
+                                            int32_t windowH, int32_t windowW);
+
+ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size);
+
+/**
+ * @ingroup dnn
+ * @brief get the mask output dimension info of maxpooling training forward
+ * @param [in] pooling   descriptor of convolution operator
+ * @param [in] xDesc   descriptor of input tensor
+ * @param [in|out] n   point to batch size
+ * @param [in|out] c   point to channels
+ * @param [in|out] h   point to height of feature map
+ * @param [in|out] w   point to width of feature map
+ * @param [in|out] windowH   point to height of window
+ * @param [in|out] windowW   point to width of windowW
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n,
+                               int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
+                                     ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
+                                     const ccTensorDescriptor_t logitsDesc, const void *logits,
+                                     const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
+                                     const void *beta, const ccTensorDescriptor_t lossDesc, void *loss);
+
+ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
+                                   ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
+                                   const ccTensorDescriptor_t logitsDesc, const void *logits,
+                                   const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
+                                   const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
+
+ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
+                                const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+                                const ccTensorDescriptor_t dxDesc, const void *dx);
+
+ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha,
+                              const void *beta, const ccTensorDescriptor_t aDesc, void *a,
+                              const ccTensorDescriptor_t bDesc, const void *b);
+
+/**
+ * @ingroup dnn
+ * @brief momentum optimizer for variable update
+ * @param [in] handle                       cce handle
+ * @param [in] inputDesc                    descriptor of input tensor: gradient,accumulation,variable
+ * @param [in] gradient                     gradient input
+ * @param [in|out] accumulation             accumulation input and updated output
+ * @param [in|out] variable                 variable input and updated output
+ * @param [in] algo                         indicate whether need FP16 output
+ * @param [in] momentum                     scaler to control accumulation
+ * @param [in] learningRate                 scaler
+ * @param [in] lossScaleReciprocal          scaler
+ * @param [in] workSpace                    additional memory address
+ * @param [in] workSpaceSizeInBytes         additional memory size
+ * @param [out] variableUpdatedFP16Desc     descriptor of FP16 output tensor: variableUpdatedFP16
+ * @param [out] variableUpdatedFP16         variableUpdatedFP16
+ * @return ccStatus_t
+ */
+ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient,
+                           void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum,
+                           const void *learningRate, const void *lossScaleReciprocal, void *workSpace,
+                           const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc,
+                           void *variableUpdatedFP16);
+
+ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc,
+                                  const void *label, const ccTensorDescriptor_t greaterConstDesc,
+                                  const void *greaterConst, const ccTensorDescriptor_t subConstDesc,
+                                  const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse,
+                                  const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout,
+                                  const ccTensorDescriptor_t muloutDesc, const void *mulout);
+
+#endif
+
+/**
+ * @ingroup dnn
+ * @brief get the workspace size of applymomentum
+ * @param [in] inputDesc                    descriptor of input tensor
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes);
+#ifndef DAVINCI_LITE
+ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
+                        const ccFilterDescriptor_t yDesc, void *y);
+
+ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
+                        const ccFilterDescriptor_t yDesc, void *y);
+ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
+                         const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes,
+                         const ccTensorDescriptor_t yDesc, void *y);
+#endif
+ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
+                                         const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
+ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
+ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                             const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
+                             uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
+ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
+                                             const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
+                                             uint32_t *sizeInBytes);
+ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
+                                         int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                             const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
+                             uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
+ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
+                                             const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
+                                             uint32_t *sizeInBytes);
+ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
+                                         int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief get workspace size
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] sizeInBytes  workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief random shuffle forward computation
+ * @param [in] handle               cce handle
+ * @param [in] alpha                common scale factor
+ * @param [in] xDesc                descriptor of input data
+ * @param [in] x                    input data in device memory
+ * @param [in] workspace            temporary space
+ * @param [in] workspaceSizeInBytes temporary space size
+ * @param [in] seed                 random seed used to generate random number
+ * @param [in] seed2                random seed used to generate random number
+ * @param [in] beta                 common scale factor
+ * @param [in] outputDesc           descriptor of output data
+ * @param [in|out] output           output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                  void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1,
+                                  const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc,
+                                  void *output);
+/**
+ * @ingroup dnn
+ * @brief sin forward:
+ *          data type only support float float16 double
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] input        input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
+                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief cos forward:
+ *          data type only support float float16 double
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] input        input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
+                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief tan forward:
+ *          data type only support float float16 double
+ *          data format only support ND
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] input        input data in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
+                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of unstack
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] axis             the axis to unstack along
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[],
+                                 int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief unstack forward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data
+ * @param [in] x            input data in device memory
+ * @param [in] num          the length of the dimension axis
+ * @param [in] axis         the axis to unstack along
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                            int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc,
+                            void *output[]);
+
+ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc,
+                                             const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                             const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of resize nearest neighbor
+ * @param [in] resizeDesc       descriptor of resize
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc,
+                                               const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[],
+                                               int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of ResizeNearestNeighbor
+ * @param [in|out] resizeDesc   point to descriptor of ResizeNearestNeighbor attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of ResizeNearestNeighbor
+ * @param [in|out] resizeDesc   point to descriptor of ResizeNearestNeighbor attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
+
+/**
+ * @ingroup dnn
+ * @brief set descriptor of ResizeNearestNeighbor.
+ * @param [in|out] resizeDesc           descriptor of resize nearest neighbor operator
+ * @param [in] alignCorners             whether the centers of input and output are aligned
+ * @param [in] height                   height of output
+ * @param [in] width                    width of output
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners,
+                                                int32_t height, int32_t width);
+
+/**
+ * @ingroup dnn
+ * [ccGetPadV2OutputDim]
+ * @brief get the output dimension info of pad
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] padDesc          descriptor of input paddings
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt,
+                               int32_t dim[], int32_t dimLen);
+
+ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha,
+                             const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                             const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of parameters for padv2 function
+ * @param [in] point to descriptor of parameters for padv2 function
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of parameters for padv2 function
+ * @param [in] point to descriptor of parameters for padv2 function
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc);
+
+/**
+ * @brief init descriptor for parameter of padv2 function
+ * @param [in|out] padDesc   descriptor of pad
+ * @param [in] padShapeCnt   padshape count
+ * @param [in] padShapeLow   padshape low
+ * @param [in] padShapeHigh  padshape high
+ * @param [in] padMode       pad mode
+ * @param [in] padValue      pad value ptr
+ * @param [in] padValueType  pad value data type
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[],
+                                const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue,
+                                const ccDataType_t padValueType);
+/**
+ * @ingroup dnn
+ * @brief create descriptor of batchToSpace
+ * @param [in|out] batchToSpaceDesc  point to descriptor of batchToSpace
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
+
+/**
+ * @ingroup dnn
+ * @brief set batchToSpaceDesc
+ * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
+ * @param [in] blockShape  blockShape of batchToSpace
+ * @param [in] crops  crops of batchToSpace
+ * @param [in] blockShapeLength  blockShapeLength of batchToSpace
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape,
+                                       const int32_t *crops, const int32_t blockShapeLength);
+
+/**
+ * @ingroup dnn
+ * @brief get batchToSpaceDesc
+ * @param [in|out] batchToSpaceDesc descriptor of batchToSpace
+ * @param [in] blockShape  blockShape of batchToSpace
+ * @param [in] crops  crops of batchToSpace
+ * @param [in] blockShapeLength  blockShapeLength of batchToSpace
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops,
+                                       int32_t *blockShapeLength);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of batchToSpace
+ * @param [in] *batchToSpaceDesc descriptor of batchToSpace
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of batch to space
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc,
+                                      const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[],
+                                      int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief batch to space forward computation
+ * @param [in] handle           cce handle
+ * @param [in] paramsDesc       descriptor of input params
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha,
+                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                 const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of spaceToBatch
+ * @param [in|out] spaceToBatchDesc  point to descriptor of spaceToBatch
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
+
+/**
+ * @ingroup dnn
+ * @brief set spaceToBatchDesc
+ * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
+ * @param [in] blockShape  blockShape of spaceToBatch
+ * @param [in] paddings  paddings of spaceToBatch
+ * @param [in] blockShapeLength  blockShapeLength of spaceToBatch
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape,
+                                       const int32_t *paddings, const int32_t blockShapeLength);
+
+/**
+ * @ingroup dnn
+ * @brief get spaceToBatchDesc
+ * @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
+ * @param [in] blockShape  blockShape of spaceToBatch
+ * @param [in] paddings  paddings of spaceToBatch
+ * @param [in] blockShapeLength  blockShapeLength of spaceToBatch
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape,
+                                       int32_t *paddings, int32_t *blockShapeLength);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of spaceToBatch
+ * @param [in] *spaceToBatchDesc descriptor of spaceToBatch
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of space to batch
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc,
+                                      const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[],
+                                      int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief space to batch forward computation
+ * @param [in] handle           cce handle
+ * @param [in] paramsDesc       descriptor of input params
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+
+ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha,
+                                 const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                                 const ccTensorDescriptor_t outputDesc, void *output);
+
+ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc);
+
+/*
+ * @brief get the output dimension info of extractImagePatches
+ * @param [in] xDesc            descriptor of input tensor x
+ * @param [in] ksizes           ksizes array
+ * @param [in] strides          strides array
+ * @param [in] rates            rates array
+ * @param [in] padding          padding type
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes,
+                                             const ccIntArray_t *strides, const ccIntArray_t *rates,
+                                             const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt,
+                                             int32_t dim[], const int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief cum forward.
+ * @param [in] handle       cce handle
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data, dimCnt:1~8
+ * @param [in] x            input data in device memory
+ * @param [in] axisDesc      scale factor, dimCnt:0
+ * @param [in] axis            which axis to cum calc, device memory
+ * @param [in] beta         common scale factor
+ * @param [in] opType         calc type, eg. sum, prod....
+ * @param [in] exclusive       cum  flag, true or false
+ * @param [in] reverse         cum  flag, true or false
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                        const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType,
+                        const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @ingroup dnn
+ * @brief ExtractImagePatches forward.
+ * @param [in] handle       cce handle
+ * @param [in] ksizes       ksizes array
+ * @param [in] strides      strides array
+ * @param [in] rates        rates array
+ * @param [in] padding      padding type
+ * @param [in] alpha        common scale factor
+ * @param [in] xDesc        descriptor of input data x
+ * @param [in] x            input data x in device memory
+ * @param [in] beta         common scale factor
+ * @param [in] outputDesc   descriptor of output data
+ * @param [in|out] output   output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides,
+                                        const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding,
+                                        const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                                        const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @brief get argmax output dim info
+ * @param [in] argDesc          argmaxmin descriptor
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] dimCnt       output dim count
+ * @param [in|out] dim          output dim
+ * @param [in| dimlen        length of dim
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
+                                int32_t *dimCnt, int32_t dim[], int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief argmax forward computation
+ * @param [in] handle           cce handle
+ * @param [in] argDesc          argmaxmin descriptor
+ * @param [in] alpha            scaling factors
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in] x                input data in device memory
+ * @param [in] workSpace        workspace pointer
+ * @param [in] workSpaceSizeInBytes   workspace size in bytes
+ * @param [in] beta             bias factors
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
+                           const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
+                           const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
+                           void *output);
+
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of argmaxmin
+ * @param [in] argDesc          descriptor of tagCcArgmaxmin
+ * @param [in] xDesc            descriptor of input tensor
+ * @param [in|out] sizeInBytes  workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
+                                    uint32_t *sizeInBytes);
+
+/**
+ * @ingroup dnn
+ * @brief create descriptor of Argmaxmin
+ * @param [in|out] resizeDesc   point to descriptor of Argmaxmin attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of Interp
+ * @param [in|out] resizeDesc   point to descriptor of Argmaxmin attr
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
+
+/**
+ * @ingroup dnn
+ * @brief destroy descriptor of Interp
+ * @param [in|out] argDesc      descriptor of tagCcArgmaxmin
+ * @param [in] axisType
+ * @param [in] outMaxVal        whether to return the maximum value
+ * @param [in] topK             number that returns the maximum index or maximum value
+ * @param [in] axis             Describes which axis of the input Tensor to reduce across
+ * @param [in] keepDims         whether to keep reduced dim
+ * @param [in] reduceSize       the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num
+ * of elements in axis dimension
+ * @param [in] reduceStride     the stride for reduce operation, reduceStride=1 means the layout of target data is
+ * continuous
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK,
+                                    int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1);
+
+ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
+                           const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
+                           const ccTensorDescriptor_t outputDesc, void *output);
+
+ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
+                                int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief lsh projection forward computation
+ * @param [in] handle           cce handle
+ * @param [in] alpha            scaling factors
+ * @param [in] hashDesc         descriptor of input tensor hashDesc
+ * @param [in] hash             input data hash in device memory
+ * @param [in] weightDesc       descriptor of input tensor weightDesc
+ * @param [in] weight           input data weight in device memory
+ * @param [in] inputDesc       descriptor of input tensor inputDesc
+ * @param [in] lookup           input data lookup in device memory
+ * @param [in] type             1:SPARSE 2.DENSE
+ * @param [in] beta             bias factors
+ * @param [in] workSpace          workSpace data in device memory
+ * @param [in] workSpaceSizeInBytes   workSpace length
+ * @param [in] outputDesc       descriptor of output tensor
+ * @param [in|out] output       output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc,
+                                  const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight,
+                                  const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type,
+                                  const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes,
+                                  const ccTensorDescriptor_t outputDesc, void *output);
+/**
+ * @ingroup dnn
+ * @brief get the workspace size of lsh projection
+ * @param [in] inputDesc         descriptor of input tensor input
+ * @param [in] hashDataType      data type of hash
+ * @param [in|out] sizeInBytes   workspace size
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType,
+                                                  uint32_t *sizeInBytes);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of LshProjection,
+ * @param [in] hashDesc         descriptor of hash
+ * @param [in] type             type of mode
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in] dimLen           dim length
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type,
+                                       int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
+/**
+ * @ingroup dnn
+ * @brief get the weight dimension info of LshProjection,
+ * @param [in] inputDesc          descriptor of input
+ * @param [in|out] dimCnt       point to the weight dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in] dimLen           dim length
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[],
+                                       const int32_t dimLen);
+
+/**
+ * @ingroup dnn
+ * @brief init descriptor for parameter of upsample function
+ * @param [in] handle                 cce handle
+ * @param [in] upsamplePara           input para in host memory
+ * @param [in] alpha                  common scale factor
+ * @param [in] bottomDesc             descriptor of input data bottomDesc
+ * @param [in] bottom                 input data bottom in device memory
+ * @param [in] bottomMaskDesc         descriptor of input data bottomMaskDesc
+ * @param [in] bottomMask             input data bottomMask in device memory
+ * @param [in] beta                   common scale factor
+ * @param [in] outputDesc             descriptor of output data
+ * @param [in|out] output             output data in device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha,
+                             const ccTensorDescriptor_t bottomDesc, const void *bottom,
+                             const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta,
+                             const ccTensorDescriptor_t outputDesc, void *output);
+
+/**
+ * @brief creat descriptor for parameter of usample function
+ * @param [in|out] upsampleDesc   descriptor of upsamplepara
+ * @return ccStatus_t
+ */
+ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
+
+/**
+ * @brief destroy descriptor for parameter of upsample function
+ * @param [in|out] upsampleDesc   descriptor of upsamplepara
+ * @return ccStatus_t
+ */
+ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
+
+/**
+ * @brief set descriptor for parameter of upsample function
+ * @param [in|out] upsampleDesc   descriptor of upsamplepara
+ * @param [in] scale              the scale of height and width
+ * @param [in] scaleHeight        the scale of height
+ * @param [in] scaleWidth         the scale of Width
+ * @param [in] upsampleHeight     the height of output
+ * @param [in] upsampleWidth      the width of output
+ * @param [in] padOutHeight       pad value height
+ * @param [in] padOutWidth        pad value width
+ * @return ccStatus_t
+ */
+ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale,
+                                   const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight,
+                                   const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth);
+/**
+ * @ingroup dnn
+ * @brief get the output dimension info of upsample
+ * @param [in] upsamplePara     para of upsample
+ * @param [in] bottomDesc       descriptor of input bottom tensor
+ * @param [in|out] dimCnt       point to the output dimCnt
+ * @param [in|out] dim          arrays to save dims
+ * @param [in] dimLen           the len of dim array
+ * @return ccStatus_t
+ */
+ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc,
+                                  int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
+
+#ifndef DAVINCI_LITE
+ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                    const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc,
+                    const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace,
+                    const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y,
+                    const bool transposeA, const bool transposeB);
+ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n,
+                                int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB);
+ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo,
+                                    const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
+                                    const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA,
+                                    bool transposeB);
+#endif
+
+/**
+ * @ingroup dnn
+ * @brief gather_v2 function
+ * @param [in] handle                cce handle
+ * @param [in] alpha                 common scale factor
+ * @param [in] paramsDesc            descriptor
+ * @param [in] params                device memory
+ * @param [in] indicesDesc           descriptor
+ * @param [in] indices               device memory
+ * @param [in] axisDesc              descriptor
+ * @param [in] axis                  device memory
+ * @param [in] beta                  common scale factor
+ * @param [in] outputDesc            descriptor
+ * @param [in|out] output            device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params,
+                      const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc,
+                      const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output);
+
+/**
+ * @ingroup dnn
+ * @brief memory_clear function
+ * @param [in] handle                 cce handle
+ * @param [in] addrSpaceSizeInBytes   addr space size
+ * @param [in|out] addr               device memory
+ * @return ccStatus_t
+ */
+ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr);
+
+/**
+ * @ingroup dnn
+ * @brief check input is overflow
+ * @param [in] handle      cce handle
+ * @param [in] alpha       scaling factors
+ * @param [in] xDesc       descriptor of input tensor
+ * @param [in] x           input data in device memory
+ * @param [in] yDesc       descriptor of output tensor
+ * @param [in|out] y       output data in device memory
+ * @param [in] beta        scaling factors
+ * @return ccStatus_t
+ */
+ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
+                      const ccTensorDescriptor_t yDesc, const void *y, const void *beta);
+};  // namespace cce
+
+#endif  // DNN_OP_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp b/third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp
new file mode 100644
index 00000000..96566074
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_STRUCT_HPP__
+#define DNN_STRUCT_HPP__
+
+#include "dnn.h"
+#include "dnn_struct_base.hpp"
+
+#endif  // DNN_STRUCT_HPP__
diff --git a/third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp b/third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp
new file mode 100644
index 00000000..dd75e9ea
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp
@@ -0,0 +1,894 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DNN_STRUCT_BASE_HPP__
+#define DNN_STRUCT_BASE_HPP__
+
+#include "cce/cce_def.hpp"
+
+namespace cce {
+
+/**
+ * @ingroup dnn
+ * @brief max number of dimensions
+ */
+#define CC_DIM_MAX (8)
+
+/**
+ * @ingroup dnn
+ * @brief max number of dimensions when use NC1HWC0 format
+ */
+#define CC_REALDIM_MAX (4)
+
+/**
+ * @ingroup dnn
+ * @brief max input count of MscnnBoxOutput
+ */
+#define CC_MAX_INPUT_CNT (10)
+
+/**
+ * @ingroup dnn
+ * @brief image dimensions of aipp input
+ */
+#define CC_AIPP_IMG_DIM (2)
+
+/**
+ * @ingroup dnn
+ * @brief image channel number of aipp input
+ */
+#define CC_AIPP_IMG_CHN_NUM (4)
+
+/**
+ * @ingroup dnn
+ * @brief element number of aipp color space convertion matrix
+ */
+#define CC_AIPP_CSC_MATRIX_DIM (9)
+
+/**
+ * @ingroup dnn
+ * @brief element number of aipp color space convertion bias
+ */
+#define CC_AIPP_CSC_BIAS_DIM (3)
+
+/**
+ * @ingroup dnn
+ * @brief parameter number of op exp/log/pow
+ */
+#define PARAM_CNT_THREE (3)
+
+/**
+ * @ingroup dnn
+ * @brief parameter number of op nonmaxsuppression
+ */
+#define PARAM_CNT_TWO (2)
+#define DIMCNT_NUMBER_ONE (1)
+#define DIMCNT_NUMBER_TWO (2)
+#define DIMCNT_NUMBER_FOUR (4)
+
+#define COMMON_FORMAT_NCHW_N_INDEX (0)
+#define COMMON_FORMAT_NCHW_C_INDEX (1)
+#define COMMON_FORMAT_NCHW_H_INDEX (2)
+#define COMMON_FORMAT_NCHW_W_INDEX (3)
+
+/**
+ * @ingroup dnn
+ * @brief parameter number of op upsample
+ */
+#define UPSAMPLE_SCAL_DEFAULT_TWO (2)
+#define UPSAMPLE_ILLEGAL_VALUE_1 (1)
+
+/**
+ * @ingroup dnn
+ * @brief struct define of StridedSlice required params.
+ */
+
+typedef struct tagCcStridedSlice {
+  uint32_t dimCnt;
+  int32_t begin[CC_DIM_MAX];
+  int32_t end[CC_DIM_MAX];
+  int32_t strides[CC_DIM_MAX];
+} ccStridedSlice_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of Strided_slice attrs
+ */
+typedef struct tagCcStridedSliceAttrs {
+  uint32_t beginMask;
+  uint32_t endMask;
+  uint32_t ellipsisMask;
+  uint32_t newAxisMask;
+  uint32_t shrinkAxisMask;
+} ccStridedSliceAttrs_t;
+
+/**
+ * @ingroup dnn
+ * @brief params of batchToSpace
+ */
+typedef struct tagCcBatchToSpace {
+  int32_t blockShapeLength;
+  int32_t blockShape[CC_DIM_MAX];
+  int32_t crops[2 * CC_DIM_MAX];
+} ccBatchToSpace_t;
+
+/**
+ * @ingroup dnn
+ * @brief params of spaceToBatch
+ */
+typedef struct tagCcSpaceToBatch {
+  int32_t blockShapeLength;
+  int32_t blockShape[CC_DIM_MAX];
+  int32_t paddings[2 * CC_DIM_MAX];
+} ccSpaceToBatch_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of tensor
+ */
+typedef struct tagCcTensor {
+  ccTensorFormat_t format;
+  ccDataType_t dataType;
+  int32_t dimCnt;
+  int32_t realDimCnt;
+  uint32_t dataSize;
+  int32_t dim[CC_DIM_MAX];
+  int32_t stride[CC_DIM_MAX];
+  ccVecQuantizePara_t vecQuantizePara;
+} ccTensor_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of filter tensor
+ */
+typedef struct tagCcFilter {
+  ccTensorFormat_t format;
+  ccDataType_t dataType;
+  int32_t dimCnt;
+  uint32_t dataSize;
+  int32_t dim[CC_DIM_MAX];
+} ccFilter_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of convolution operator
+ */
+typedef struct tagCcConvolution {
+  ccConvolutionMode_t mode;
+  ccPaddingMode_t padMode;
+  int32_t dimCnt;
+  int32_t padding[2 * (CC_DIM_MAX - 2)];
+  int32_t filterStride[CC_DIM_MAX - 2];
+  int32_t dilation[CC_DIM_MAX - 2];
+  int32_t group;
+  ccQuantizeDescriptor_t quantInfo;
+  ccConvolutionAipp_t aippInfo;
+  int32_t adj[CC_DIM_MAX - 2];
+  int32_t targetShape[CC_DIM_MAX - 2];
+  int32_t beforePadding[2 * (CC_DIM_MAX - 2)];  // pad before conv
+  uint32_t reluFlag;
+  int64_t concatBatchSize;
+} ccConvolution_t;
+
+#define ccCorrelation_t ccConvolution_t
+typedef struct tagCcFullConnection_t {
+  ccQuantizeDescriptor_t quantInfo;
+  uint32_t infoTabSize;
+  const void *infoTab;
+  bool reluFlag;
+  ccFullConnectFwdAlgo_t algo;
+} ccFullConnection_t;
+
+typedef struct tagCcConcatFour2Five_t {
+  uint32_t branchNum;  // how many branch for box or class
+  uint32_t classNum;   // box branch's classNum is four, class branch's classNum is class number
+} ccConcatFour2Five_t;
+
+typedef struct tagCcTransdata_t {
+  uint64_t scaleQAddr;
+  uint8_t scaleQValueMode;
+  uint64_t offsetQAddr;
+  uint8_t quantAlgo;
+  uint8_t quantize8bitFlag;
+} ccTransdata_t;
+/**
+ * @ingroup dnn
+ * @brief struct define of pooling operator
+ */
+typedef struct tagCcPooling {
+  ccPoolingMode_t mode;
+  ccPaddingMode_t padMode;
+  ccNanPropagation_t maxpoolingNanOpt;
+  int32_t dimCnt;
+  int32_t windowDim[CC_DIM_MAX - 2];
+  int32_t padding[CC_DIM_MAX - 2];
+  int32_t stride[CC_DIM_MAX - 2];
+  int32_t dataMode;
+  int32_t ceilMode;
+  ccQuantizeDescriptor_t quantInfo;
+  ccPooingFwdAlgo_t algo;
+} ccPooling_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of activation operator
+ */
+typedef struct tagCcActivation {
+  ccActivationMode_t mode;
+  ccNanPropagation_t reluNanOpt;
+  double coef; /* ceiling for clipped RELU, alpha for ELU */
+  ccActivationPara_u activationPara;
+} ccActivation_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of svdf operator
+ */
+typedef struct tagCcSvdf {
+  ccTensorFormat_t format;
+  ccDataType_t dataType;
+  uint32_t batches;
+  uint32_t features;
+  uint32_t rank;
+  uint32_t inputSize;
+  uint32_t memorySize;
+} ccSvdf_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of svdf operator
+ */
+typedef struct tagCcHashTableLookup {
+  ccTensorFormat_t format;
+  ccDataType_t lookupType;
+  ccDataType_t keyType;
+  ccDataType_t valueType;
+  ccDataType_t outputType;
+  ccDataType_t hitsType;
+  uint32_t lookups;
+  uint32_t keys;
+  uint32_t rows;
+  uint32_t features;
+  uint16_t valueScale;
+  uint16_t outputScale;
+  uint16_t valueOffset;
+  uint16_t outputOffset;
+} ccHashTableLookup_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of prelu operator
+ */
+typedef struct tagCcPRelu {
+  ccNanPropagation_t reluNanOpt;
+  int32_t slopeCount;
+  bool channelShared;
+} ccPRelu_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of crop operator
+ */
+typedef struct tagCcCrop {
+  int32_t startAxis;
+  int32_t offset[CC_DIM_MAX];
+  int32_t offsetCnt;
+} ccCrop_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of SpatialTransformer operator
+ */
+typedef struct tagCcSpatialTransformer {
+  ccSamplerType_t samplerType;
+  ccDataType_t dataType;
+  int32_t dimCnt;
+  uint64_t dim[CC_DIM_MAX];
+  uint64_t alignCorner;
+} ccSpatialTransformer_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of ShiftTransformer operator
+ */
+typedef struct tagCcShiftTransformer {
+  ccSamplerType_t samplerType;
+  double xPreDefined;
+  double yPreDefined;
+  bool xShift;
+  bool yShift;
+  int32_t gridH;
+  int32_t gridW;
+} ccShiftTransformer_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of FasterRcnnProposal operator
+ */
+typedef struct tagCcFasterRcnnProposal {
+  int32_t preNMStopK;
+  int32_t postNMStopK;
+  float nmsTresh;
+  float minSize;
+  float featStride;
+  float baseSize;
+  int32_t ratioCnt;
+  int32_t scaleCnt;
+  float *ratio;
+  float *scale;
+  int32_t imgH;
+  int32_t imgW;
+} ccFasterRcnnProposal_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of LRN operator
+ */
+typedef struct tagCcLRN {
+  ccLRNMode_t lrnMode;
+  int32_t lrnN;
+  double lrnAlpha;
+  double lrnBeta;
+  double lrnK;
+} ccLRN_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of instanceNorm
+ */
+typedef struct tagCcInstancenorm {
+  ccInstanceNormMode_t mode;
+  double epsilon;
+} ccInstancenorm_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of assignOp operator
+ */
+typedef struct tagCcAssignOp {
+  ccAssignOpMode_t assignOpMode;
+} ccAssignOp_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of arcSinCos operator
+ */
+typedef struct tagCcArcSinCos {
+  ccArcSinCosMode_t arcSinCosMode;
+} ccArcSinCos_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of Detectpostprocess operator
+ */
+typedef struct tagCcDetectpostprocess {
+  int32_t numClasses;
+  float confThreshold;
+  float nmsThreshold;
+  int32_t outTopK;
+  float bboxRegWeightsDx;
+  float bboxRegWeightsDy;
+  float bboxRegWeightsDw;
+  float bboxRegWeightsDh;
+} ccDetectpostprocess_t;
+/**
+ * @ingroup dnn
+ * @brief struct define of FasterRcnnDetectionOutput operator
+ */
+typedef struct tagCcFasterRcnnDetectionOutput {
+  int32_t numClasses;
+  float nmsThreshold;
+  float postConfThreshold;
+  int32_t imgH;
+  int32_t imgW;
+  int32_t batchSize;
+} ccFasterRcnnDetectionOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of SsdDetectionOutput operator
+ */
+typedef struct tagCcSsdDetectionOutput {
+  int32_t numClasses;
+  int32_t backgroundLabelId;
+  double preConfThreshold;
+  int32_t preTopK;
+  double nmsThreshold;
+  double nmsEta;
+  ccBoxCodeType_t codeType;
+  int32_t outTopK;
+  bool shareLocation;
+  bool varianceEncodedInTarget;
+  uint32_t boxTypeNum;
+  float var[4];
+  uint32_t variance_num;
+} ccSsdDetectionOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of RefinedetDetectionOutput operator
+ */
+typedef struct tagCcRefinedetDetectionOutput {
+  int32_t numClasses;
+  int32_t backgroundLabelId;
+  double preConfThreshold;
+  int32_t preTopK;
+  double nmsThreshold;
+  double nmsEta;
+  ccBoxCodeType_t codeType;
+  int32_t outTopK;
+  bool shareLocation;
+  bool varianceEncodedInTarget;
+  uint32_t boxTypeNum;
+  float var[4];
+  uint32_t variance_num;
+  double objectness_score;
+} ccRefinedetDetectionOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of MsrGenerateRpnProposals operator
+ */
+typedef struct tagCcMsrGenerateRpnProposals {
+  int32_t preNmsTopK;
+  int32_t postNmsTopK;
+  float nmsThreshold;
+  float rpnMiniSize;
+  int32_t imgH;
+  int32_t imgW;
+  uint32_t boxTypeNum;
+  float scoreThreshold;
+} ccMsrGenerateRpnProposals_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of RetinaPostprocessor operator
+ */
+typedef struct tagCcRetinaPostprocessor {
+  int32_t numClasses;
+  int32_t maxDetections;
+  float nmsThreshold;
+  float scoreThreshold;
+  int32_t imgH;
+  int32_t imgW;
+  uint32_t boxTypeNum;
+  float mean[4];
+  int32_t meanNum;
+  float std[4];
+  int32_t stdNum;
+  int32_t outputNum;
+  bool ocrFlag;
+} ccRetinaPostprocessor_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of GenerateSsdAnchors operator
+ */
+typedef struct tagCcGenerateSsdAnchors {
+  int32_t featureMapShapeList[20];
+  uint32_t featureMapShapeListSize;
+  int32_t boxSpecsNum[10];
+  uint32_t boxSpecsNumSize;
+  float scales[10];
+  uint32_t scalesNum;
+  float aspectRatios[10];
+  uint32_t aspectRatiosNum;
+  int32_t baseAnchorSize[2];
+  uint32_t baseAnchorSizeNum;
+  int32_t anchorStride[2];
+  uint32_t anchorStrideNum;
+  int32_t anchorOffset[2];
+  uint32_t anchorOffsetNum;
+  bool reduceBoxesInLowestLayer;
+  float minScale;
+  float maxScale;
+  int32_t imgH;
+  int32_t imgW;
+} ccGenerateSsdAnchors_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of MscnnBoxOutput operator
+ */
+typedef struct tagCcMscnnBoxOutput {
+  double fgThreshold;
+  double nmsThreshold;
+  ccNmsType_t nmsType;
+  int32_t fieldH[CC_MAX_INPUT_CNT];
+  int32_t fieldW[CC_MAX_INPUT_CNT];
+  int32_t downsampleRate[CC_MAX_INPUT_CNT];
+  int32_t defaultBoxCnt;
+  double fieldWhr;
+  double fieldXyr;
+  int32_t maxNmsNum;
+  int32_t maxPostNmsNum;
+  double minSize;
+} ccMscnnBoxOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of NMS operator
+ */
+typedef struct tagCcNms {
+  int32_t numClasses;
+  int32_t backgroundLabelId;
+  double preConfThreshold;
+  int32_t preTopK;
+  double nmsThreshold;
+  double nmsEta;
+  int32_t postTopK;
+  int32_t outTopK;
+  double postConfThreshold;
+  bool shareLocation;
+} ccNms_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of NMS/MultiClassNMS operator
+ */
+typedef struct tagCcMultiClassNms {
+  uint64_t numClasses;
+  float objThreshold;
+  float nmsThreshold;
+  float clsThreshold;
+  bool normal;
+  uint64_t coorType;
+} ccCcMultiClassNms_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of YoloDetectionOutput operator
+ */
+typedef struct tagCcYoloDetectionOutput {
+  ccYoloVersion_t yoloVersion;
+  uint32_t netH;
+  uint32_t netW;
+  uint32_t postTopK;
+  uint32_t classes;
+  float nmsThreshold;
+  float iouThreDecay;
+  float coorScaleFactor;
+  bool relative;
+  float objThreshold;
+  float clsThreshold;
+  uint32_t biasNum;
+  float *bias;
+} ccYoloDetectionOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of GetRegionBox operator
+ */
+#ifndef CC_MAX_YOLO_BIAS_NUM
+#define CC_MAX_YOLO_BIAS_NUM (16)
+#endif
+
+typedef struct tagCcGetRegionBox {
+  uint32_t biasNum;
+  uint32_t H;
+  uint32_t W;
+  float bias[CC_MAX_YOLO_BIAS_NUM];
+} ccGetRegionBox_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of CorrectBoxes operator
+ */
+typedef struct tagCorrectBoxes {
+  uint32_t netW;
+  uint32_t netH;
+  bool relative;
+} ccCorrectBoxes_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of ClsProb operator
+ */
+typedef struct tagClsProb {
+  float objThreshold;
+} ccClsProb_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of SsdPriorBox operator
+ */
+typedef struct tagCcSsdPriorBox {
+  ccBoxCodeType_t codeType;
+  double *minSize;
+  int32_t minSizeNum;
+  double *maxSize;
+  int32_t maxSizeNum;
+  double *aspectRatio;
+  int32_t aspectRatioNum;
+  double *variance;
+  int32_t varianceNum;
+  int32_t imgH;
+  int32_t imgW;
+  double stepH;
+  double stepW;
+  double offset;
+  bool flip;
+  bool clip;
+} ccSsdPriorBox_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of Yolo2Region operator
+ */
+typedef struct tagCcYolo2Region {
+  ccSoftmaxTree_t softmaxTree;
+  bool softmax;
+  bool background;
+  bool treeSoftmax;
+} ccYolo2Region_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of YoloRegion operator
+ */
+typedef struct tagCcYoloRegion {
+  ccSoftmaxTree_t softmaxTree;
+  bool softmax;
+  bool background;
+  bool treeSoftmax;
+  int32_t classes;
+  int32_t coords;
+  int32_t boxes;
+  ccYoloVersion_t yoloV;
+} ccYoloRegion_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of power operator
+ */
+typedef struct tagCcPower {
+  float scale;
+  float shift;
+  float power;
+} ccPower_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of exp operator
+ */
+typedef struct tagCcExp {
+  ccDataType_t dataType;
+  uint32_t paramCnt;
+} ccExp_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of exp operator
+ */
+typedef struct tagCcLog {
+  ccDataType_t dataType;
+  uint32_t paramCnt;
+} ccLog_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of pow operator
+ */
+typedef struct tagCcPow {
+  ccDataType_t dataType;
+  uint32_t paramCnt;
+} ccPow_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of padv2 operator
+ */
+typedef struct tagCcPadV2 {
+  ccPadMode_t padMode;
+  void *padValue;
+  ccDataType_t padValueType;
+  int32_t padDimCnt;
+  int32_t padShapeLow[CC_DIM_MAX];
+  int32_t padShapeHigh[CC_DIM_MAX];
+} ccPadV2_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of psROIPooling operator
+ */
+typedef struct tagCcPsRoiPooling {
+  ccPoolingMode_t poolingMode;
+  int32_t pooledH;
+  int32_t pooledW;
+  float spatialScale;
+  float padRatio;
+  int32_t groupSize;
+  int32_t outputDim;
+} ccPsRoiPooling_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of RoIAlign operator
+ */
+typedef struct tagCcRoiAlign {
+  int32_t pooledH;
+  int32_t pooledW;
+  float spatialScale;
+  int32_t samplingRatio;
+} ccRoiAlign_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of RoiInterpPooling operator
+ */
+typedef struct tagCcRoiInterpPooling {
+  int32_t pooledH;
+  int32_t pooledW;
+  int32_t poolKernelH;
+  int32_t poolKernelW;
+  int32_t pooledTailH;
+  int32_t pooledTailW;
+  float spatialScaleH;
+  float spatialScaleW;
+} ccRoiInterpPooling_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of DetectionFull3DOutput operator
+ */
+typedef struct tagCcDetectionFull3DOutput {
+  int32_t imageWidth;
+  int32_t imageHeight;
+  int32_t numAngleBins;
+  float trcMarginRatioX;
+  float trcMarginRatioY;
+  int32_t pitchRangeD;
+  int32_t pitchPresetD;
+  float mountHeight;
+  int32_t visiblenessBins;
+  float meanVisibleness;
+  bool discreteVisibleness;
+} ccDetectionFull3DOutput_t;
+
+/**
+ * @ingroup dnn
+ * @brief struct define of MsrFastRcnnPredictions operator
+ */
+typedef struct tagMsrFastRcnnPredictions {
+  int32_t numClasses;    // num of classes
+  float scoreThreshold;  // the threshold of the score
+  double nmsThreshold;   // the threshold of nms
+  int32_t postTopK;
+  int32_t outTopK;
+  int32_t imgH;  // the height of image
+  int32_t imgW;  // the width of image
+} ccMsrFastRcnnPredictions_t;
+
+typedef struct tagCcResizeBilinear {
+  ccResizeOutputDimMode_t resizeOutputDimMode;
+  bool alignCorners;
+  int32_t zoom_factor;
+  int32_t shrink_factor;
+  int32_t height;
+  int32_t width;
+  int32_t pad_begin;
+  int32_t pad_end;
+} ccResizeBilinear_t;
+
+typedef struct tagCcResizeNearestNeighbor {
+  bool alignCorners;
+  int32_t height;
+  int32_t width;
+} ccResizeNearestNeighbor_t;
+
+typedef struct tagCcEltwise {
+  ccQuantize_t *quantInfo;
+  bool reluFlag;
+} ccEltwise_t;
+
+typedef struct tagCcBatchNorm {
+  bool reluFlag;
+} ccBatchNorm_t;
+
+typedef struct tagCcPad {
+  ccPadMode_t padMode;
+  float padValue;
+  int32_t htoppad;     // padLow[0]
+  int32_t hbottompad;  // padHigh[0]
+  int32_t wleftpad;    // padLow[1]
+  int32_t wrightpad;   // padHigh[1]
+} ccPad_t;
+
+typedef struct tagCcSubCondition {
+  uint32_t BaseCondValue[4];
+  ccCMPType_t condType[4];
+  ccResultType_t resultType;
+} ccSubCondition;
+
+typedef struct tagCcShapeClassifyCond {
+  uint32_t subConditionNum;
+  ccResultType_t resultType;
+  uint32_t true_value;
+  ccSubCondition subCond[2];
+} ccShapeClassifyCond;
+
+#ifndef CC_SHAPE_CLASSIFY_CONDITION_NUM
+#define CC_SHAPE_CLASSIFY_CONDITION_NUM (8)
+#endif
+
+typedef struct tagCcShapeClassify {
+  uint32_t shapeClassifyConditionNum;
+  uint32_t defaultValue;
+  ccShapeClassifyCond shapeClassifyCond[CC_SHAPE_CLASSIFY_CONDITION_NUM];
+} ccShapeClassify_t;
+
+/**
+ * @ingroup dnn
+ * @bref struct define of square operator
+ */
+typedef struct tagCcSquare {
+  ccSquareMode_t mode;
+} ccSquare_t;
+
+/*
+ * @ingroup dnn
+ * @brief operation of segment reduction
+ */
+typedef enum {
+  CC_SEGMENT_REDUCTION_OP_SUM = 0, /**< sum */
+  CC_SEGMENT_REDUCTION_OP_INVALID
+} ccSegmentReductionOpType_t;
+
+typedef struct tagCcFillParam {
+  // The filler type.
+  ccFillOpType_t fillType;
+  ccDataType_t valueDatatype;
+  const void *value;  // the value in constant fill
+  const void *min;    // the min value in uniform fill
+  const void *max;    // the max value in uniform fill
+  const void *mean;   // the mean value in Gaussian fill
+  const void *std;    // the std value in Gaussian fill
+  // the seed used to generate data in Gaussian and uniform fill
+  int64_t seed1;
+  int64_t seed2;
+} ccFillParam_t;
+
+typedef struct tagNonMaxSuppression {
+  ccDataType_t dataType;
+  uint32_t paraCount;
+} ccNonMaxSuppression_t;
+
+typedef struct tagCcArgmaxmin {
+  int32_t axisType;
+  bool outMaxVal;
+  int64_t topK;
+  int64_t reduceSize;
+  int64_t reduceStride;
+  int64_t axis;
+  bool keepDims;
+} ccArgmaxmin_t;
+
+typedef struct tagUpsamplePara {
+  int32_t scale;
+  int32_t scaleHeight;
+  int32_t scaleWidth;
+  int32_t upsampleHeight;
+  int32_t upsampleWidth;
+  bool padOutHeight;
+  bool padOutWidth;
+} ccUpsamplePara_t;
+
+typedef struct tagCcConcatFive2Four_t {
+  ccTransForLossMode_t mode;
+  uint32_t classNum;
+} ccConcatFive2Four_t;
+
+};     // namespace cce
+#endif  // DNN_STRUCT_BASE_HPP__
diff --git a/third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h
new file mode 100644
index 00000000..7a2cbc50
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FWK_ADPT_STRUCT_H__
+#define FWK_ADPT_STRUCT_H__
+
+#include <cstdint>
+
+namespace aicpu {
+namespace FWKAdapter {
+
+// API RETURN CODE
+enum FWKAdptAPIRetCode {
+  FWK_ADPT_SUCCESS = 0,                  // success
+  FWK_ADPT_NOT_INIT = 1,                 // not init
+  FWK_ADPT_ALLOC_FAILED = 2,             // allocate memory failed
+  FWK_ADPT_PARAM_INVALID = 3,            // invalid input param
+  FWK_ADPT_PARAM_PARSE_FAILED = 4,       // parase input param failed
+  FWK_ADPT_NATIVE_ERROR = 5,             // error code
+  FWK_ADPT_NOT_SUPPORT_OPTYPE = 6,       // unsupport operate type
+  FWK_ADPT_INTERNAL_ERROR = 7,           // adpter internal error
+  FWK_ADPT_NOT_SUPPORT_DATATYPE = 8,     // unsupport input/output data type
+  FWK_ADPT_KERNEL_ALREADY_RUNING = 9,    // kernel already runing, not support parallel run
+  FWK_ADPT_SESSION_NOT_EXIST = 10,       // session id not exist
+  FWK_ADPT_SESSION_ALREADY_EXIST = 11,   // session id alread exist for create session
+  FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12,  // end of sequence
+  FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13,   // extend info type not exist
+  FWK_ADPT_UNKNOWN_ERROR = 99            // unknown error code
+};
+
+// FWKAdapter operate type
+// Notice: add new operate type  need check with OMM, and make sure append to the end line.
+enum FWKOperateType {
+  FWK_ADPT_SESSION_CREATE = 0,
+  FWK_ADPT_KERNEL_RUN,
+  FWK_ADPT_KERNEL_DESTROY,
+  FWK_ADPT_SESSION_DESTROY,
+  FWK_ADPT_SINGLE_OP_RUN,
+  FWK_ADPT_KERNEL_RUN_NO_SESS,
+};
+
+// Extend Info type for task
+enum FWKTaskExtInfoType {
+  FWK_ADPT_EXT_SHAPE_TYPE = 0,
+  FWK_ADPT_EXT_INPUT_SHAPE,
+  FWK_ADPT_EXT_OUTPUT_SHAPE,
+  FWK_ADPT_EXT_UPDATE_ADDR,
+  FWK_ADPT_EXT_OP_NAME,
+  FWK_ADPT_EXT_SESSION_INFO,
+  FWK_ADPT_EXT_BITMAP,
+  FWK_ADPT_EXT_INVALID
+};
+
+enum FWKExtUpdateAddrType {
+  FWK_ADPT_UPDATE_NULL = 0,
+  FWK_ADPT_UPDATE_INPUT,
+  FWK_ADPT_UPDATE_OUTPUT,
+  FWK_ADPT_UPDATE_INPUT_OUTPUT
+};
+
+#pragma pack(push, 1)
+// API Parameter Structure
+struct StrFWKKernel {
+  FWKOperateType opType;
+  uint64_t sessionID;  // unique
+
+  uint64_t stepIDAddr;    // step id addr
+  uint64_t kernelID;      // run kernel id, unique in session
+  uint64_t nodeDefLen;    // nodeDef protobuf len
+  uint64_t nodeDefBuf;    // NodeDef protobuf offset addr, need convert to void*
+  uint64_t funDefLibLen;  // FunctionDefLibrary protobuf len
+  uint64_t funDefLibBuf;  // FunctionDefLibrary protobuf addr which use in NodeDef, need convert to void*
+
+  uint64_t inputOutputLen;     // InputOutput shap protobuf len
+  uint64_t inputOutputBuf;     // InputOutput shap protobuf addr, need convert to void*
+  uint64_t workspaceBaseAddr;  // Workspace base addr, need convert to void*
+  uint64_t inputOutputAddr;    // InputOutput addr, need convert to void*
+
+  uint64_t extInfoLen;         // extend info total length
+  uint64_t extInfoAddr;        // extend info addr, ExtInfo structure
+};
+#pragma pack(pop)
+
+typedef StrFWKKernel FWKOperateParam;
+
+// Extent info ShapeAndType
+const uint32_t kMaxShapeDims = 8;
+#pragma pack(push, 1)
+struct ShapeAndType {
+  int32_t type;
+  int64_t dims[kMaxShapeDims];
+};
+#pragma pack(pop)
+
+// Extend info structure for extInfoAddr
+const uint32_t kExtInfoHeadSize = 8;
+
+#pragma pack(push, 1)
+struct ExtInfo {
+  int32_t  infoType;    // extend type
+  uint32_t infoLen;     // length for infoMsg
+  char     infoMsg[0];  // extend value
+};
+#pragma pack(pop)
+
+#pragma pack(push, 1)
+struct ResultSummary {
+  uint64_t shape_data_ptr;   // shape data addr, need convert to void*
+  uint64_t shape_data_size;  // num of dims
+  uint64_t raw_data_ptr;     // raw data addr,  need convert to void*
+  uint64_t raw_data_size;    // size of raw data
+};
+#pragma pack(pop)
+}  // end  namespace FWKAdapter
+}  // namespace aicpu
+
+#endif  // FWK_ADPT_STRUCT_H__
diff --git a/third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp b/third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp
new file mode 100644
index 00000000..fa5a95c9
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef L2FUSION_STRUCT_HPP_
+#define L2FUSION_STRUCT_HPP_
+
+#include <map>
+#include <string>
+#include "runtime/kernel.h"
+
+#define L2_DYNAMIC_SPLIT_NUM
+
+using namespace std;
+
+namespace fusion {
+
+typedef struct tagL2Data {
+  uint32_t l2Index;
+  uint64_t l2Addr;
+  uint64_t l2PageNum;
+} L2Data_t;
+
+typedef std::map<uint64_t, L2Data_t> L2DataMap_t;    // the key is ddr addr
+typedef std::pair<uint64_t, L2Data_t> L2DataPair_t;  // the key is ddr addr
+
+typedef struct TagTaskL2Info {
+  string nodeName;
+  rtL2Ctrl_t l2ctrl;
+
+  L2DataMap_t input;
+  L2DataMap_t output;
+  uint32_t isUsed;
+} TaskL2Info_t;
+
+typedef std::map<uint32_t, TaskL2Info_t> TaskL2InfoMap_t;    // the key is nodeId
+typedef std::pair<uint32_t, TaskL2Info_t> TaskL2InfoPair_t;  // the key is nodeId
+
+typedef std::map<string, TaskL2Info_t> TaskL2InfoFEMap_t;    // the key is nodeName
+typedef std::pair<string, TaskL2Info_t> TaskL2InfoFEPair_t;  // the key is nodeName
+
+}  // namespace fusion
+
+#endif  // L2FUSION_STRUCT_HPP_
diff --git a/third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h b/third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h
new file mode 100644
index 00000000..299998e3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FUSION_ENGINE_HPP_
+#define FUSION_ENGINE_HPP_
+
+#include "cce/cce.h"
+#include "graph/compute_graph.h"
+#include "proto/task.pb.h"
+
+#include <map>
+#include <vector>
+
+using namespace domi;
+using namespace std;
+
+namespace fusion {
+enum {
+  FUSION_STATUS_SUCCESS = 0,
+  FUSION_STATUS_FAIL = 1,
+};
+
+typedef struct {
+  uint64_t weightSize;
+  uint64_t memorySize;
+  uint8_t *dataMemBase;
+  uint8_t *weightMemBase;
+  uint32_t l2Enable;      // 1 //1 - enable l2 buffer allocation, 0 - disable l2 buffer allocation
+  uint32_t fusionEnable;  // 1    // 1 - enable buffer fusion, 0 - disable buffer fusion
+} ModelRes;
+
+static const std::string SCOPE_ID_ATTR = "fusion_scope";
+static const std::string L2FUSION_DYNAMIC_CONVERGE_OP = "l2fusion_dynamic_converge_op";
+static const std::string L2FUSION_DYNAMIC_SPLIT_NUM = "l2fusion_dynamic_split_num";
+static const std::string FUSION_VIRTUAL_OP = "fusion_virtual_op";
+static const std::string FUSION_MULTI_BATCH_STRIDE = "fusion_multi_bathc_stride";
+
+#define TVM_TYPE 1
+
+typedef std::map<int64_t, std::vector<ge::NodePtr>> kScopeNodeMap_t;
+typedef std::pair<int64_t, std::vector<ge::NodePtr>> kScopeNodePair_t;
+
+uint32_t BufferFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph, bool enable_l2dynamic = true);
+uint32_t BufferFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
+uint32_t GraphFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
+uint32_t FusionTaskBuild(cce::ccHandle_t ccHandle, ge::ComputeGraphPtr fusionGraph, ge::Buffer &buffer,
+                         ModelRes &modelRes, std::vector<TaskDef> &task_def_list_);
+void FusionTaskBuildComplete(std::vector<cce::ccHandle_t> cchandleList);
+uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
+}  // namespace fusion
+
+#endif  // FUSION_ENGINE_HPP_
diff --git a/third_party/fwkacllib/inc/inc/cce/taskdown_api.h b/third_party/fwkacllib/inc/inc/cce/taskdown_api.h
new file mode 100644
index 00000000..2323aaa7
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/taskdown_api.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TASKDOWN_API_H_
+#define TASKDOWN_API_H_
+
+#include <map>
+#include <vector>
+#include "cce/cce.h"
+#include "l2fusion_struct.hpp"
+#include "taskdown_common.hpp"
+
+namespace cce {
+
+#define CC_FUSION_OP_MAX 32
+
+typedef struct tagOpAddrsInfo {
+  void *addrPos;
+  uintptr_t addrData;
+} ccOpAddrsInfo;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr,
+                              uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr);
+
+#ifdef __cplusplus
+}
+#endif
+
+ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr,
+                                std::vector<ccOpAddrsInfo> &opAddrsInfo);
+
+ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &dateInfo);
+
+ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);
+
+}  // namespace cce
+#endif  // TASKDOWN_API_H_
diff --git a/third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp
new file mode 100644
index 00000000..3ecea523
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TASKDOWN_COMMON_H_
+#define TASKDOWN_COMMON_H_
+
+#include <map>
+#include "cce/cce_def.hpp"
+#include "common/attr_list.hpp"
+#include "l2fusion_struct.hpp"
+
+namespace cce {
+
+#define CC_FUSION_OP_MAX 32
+
+typedef enum tagccKernelType {
+  CCE_AI_CORE = 0, /* cce aicore */
+  CCE_AI_CPU = 1,  /* cce aicpu */
+  TE = 2,          /* te operator*/
+  CUSTOMIZED = 3,  /* customized operator */
+  TE_AI_CORE = 4,  /* te aicore operator*/
+  TE_AI_CPU = 5,   /* te aicpu operator */
+  AI_CPU = 6,      /* aicpu */
+  CUST_AI_CPU = 7, /* custom aicpu*/
+  INVALID = 8,     /* unknown kernel type */
+} ccKernelType;
+
+typedef struct tagOpContext {
+  ccKernelType kernelType;
+  uint32_t opId;
+  uint32_t kernelFuncId;
+  uint32_t opIndex;
+  uint32_t opCount;
+  uint32_t opIndex2[CC_FUSION_OP_MAX];
+  bool isFlowtable;
+  uint16_t *argsOffset;
+  uint32_t argsCount;
+  uint64_t genDataBaseAddr;
+  uint64_t genDataBaseSize;
+  uint64_t genWeightBaseAddr;
+  uint64_t genWeightBaseSize;
+  uint64_t genVariableBaseAddr;
+  uint64_t genVariableBaseSize;
+  uint64_t l2ctrlSize;
+} ccOpContext;
+
+typedef struct tagOpReadCount {
+  bool isEnable;
+  std::map<uint64_t, uint32_t> tensorRc;
+} ccOpReadCount;
+
+typedef enum tagTaskDownKernelIdMode {
+  CC_TASKDOWN_RESERVED = 0,
+  CC_TASKDOWN_ROIPOOLING,
+  CC_TASKDOWN_ROIPOOLING_PERF,
+  CC_TASKDOWN_ROIALIGN,
+  CC_TASKDOWN_ROIALIGN_PERF,
+  CC_TASKDOWN_FC,
+  CC_TASKDOWN_FC_COMPRESS,
+  CC_TASKDOWN_SOFTMAX_LOWEST,
+  CC_TASKDOWN_ROIALIGN_FP16,
+  CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR,
+  CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR_COMMON,
+} ccTaskDownKernelIdMode_t;
+
+ccStatus_t GetStream(ccHandle_t handle, rtStream_t *streamId);
+
+ccStatus_t ccClearOpMap(ccHandle_t handle);
+
+ccStatus_t ccSetKernelOpMap(ccHandle_t handle);
+
+ccStatus_t ccSetKernelContext(ccHandle_t handle, uint32_t opId, AttrList &attrList, bool isFlowtable,
+                              ccKernelType kernelType, void *pgraph);
+
+ccStatus_t ccGetKernelContext(rtStream_t streamId, ccOpContext &opContext);
+
+ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);
+
+ccStatus_t ccSetStreamL2Map(ccHandle_t handle, fusion::TaskL2InfoMap_t &l2AllocRes);
+
+ccStatus_t ccGetStreamL2Map(rtStream_t streamId, uint32_t opIndex, fusion::TaskL2Info_t *&l2Data);
+
+ccStatus_t ccSetOpIndex(ccHandle_t handle, uint32_t opIndex);
+
+ccStatus_t ccGetOpIndex(ccHandle_t handle, uint32_t &opIndex);
+
+ccStatus_t ccGetOpIndexByStream(rtStream_t streamId, uint32_t &opIndex);
+
+ccStatus_t ccClearStreamL2Map(ccHandle_t handle);
+
+ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc);
+
+}  // namespace cce
+#endif  // TASKDOWN_COMMON_H_
diff --git a/third_party/fwkacllib/inc/inc/hccl/base.h b/third_party/fwkacllib/inc/inc/hccl/base.h
new file mode 100644
index 00000000..9facd20c
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/hccl/base.h
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file base.h
+ * @brief HCOM data type definition 
+ * 
+ */
+
+#ifndef HCCL_BASE_H_
+#define HCCL_BASE_H_
+#include <hccl/hccl_types.h>
+#include <string>
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+typedef signed char s8;
+typedef signed short s16;
+typedef signed int s32;
+typedef signed long long s64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+/**
+ * @brief Horovod Reduction opperation
+ */
+typedef enum {
+    HOROVOD_REDUCE_AVERAGE = 0, /**< average */
+    HOROVOD_REDUCE_SUM = 1,     /**< sum */
+    HOROVOD_REDUCE_ADASUM = 2,  /**< adasum */
+    HOROVOD_REDUCE_MIN = 3,     /**< min */
+    HOROVOD_REDUCE_MAX = 4,     /**< max */
+    HOROVOD_REDUCE_PROD = 5,    /**< proo */
+    HOROVOD_REDUCE_RESERVED     /**< reserved */
+} HorovodReduceOp;
+
+const u32 HCCL_MAX_SEGMENT_NUM = 8;   // The max number of gradient segments.
+
+/**
+ * @brief the feature of the model
+ */
+struct model_feature {
+    const char *model_name;  /**< The model name */
+    u32 gradient_num;        /**< The number of gradients */
+    float *gradient_size;    /**< The size of each gradient */
+    float *gradient_time;    /**< The BP compution time of each gradient */
+};
+
+/**
+ * @brief Memory Register Address Struct for Remote Access
+ */
+struct MemRegisterAddr {
+    u64 addr;
+    u64 length;
+};
+/*
+ * @brief The max number of memory register address for remote access.
+ */
+const u32 HCCL_MAX_MEM_REGISTER_NUM = 32;
+
+enum GradSplitForceMode {
+    FORCE_NONE,     /**< no force */
+    FORCE_SIZE,     /**< force split gradient by size */
+    FORCE_RESERVED  /**< reserved */
+};
+
+enum OriginalGraphShapeType {
+    KNOWN_SHAPE,
+    UNKNOWN_SHAPE,
+    SHAPE_RESERVED  /**< reserved */
+};
+
+/**
+* @brief stream handle.
+*/
+typedef void *rtStream_t;
+
+/**
+* @brief model handle.
+*/
+typedef void *rtModel_t;
+
+struct HcomOperation {
+    std::string hcclType;
+    void *inputPtr;
+    void *outputPtr;
+    u64 count;
+    HcclDataType dataType;
+    HcclReduceOp opType;
+    u32 root;
+
+    HcomOperation()
+    {
+        inputPtr = nullptr;
+        outputPtr = nullptr;
+        count = 0;
+        dataType = HCCL_DATA_TYPE_RESERVED;
+        opType = HCCL_REDUCE_RESERVED;
+        root = 0;
+    }
+};
+
+struct HcomRemoteAccessAddrInfo {
+    u32 remotetRankID;
+    u64 remoteAddr;  // host embedding table address
+    u64 localAddr;  // device HBM address
+    u64 length;   // Memory Length in Bytes 
+};
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // HCCL_BASE_H_
diff --git a/third_party/fwkacllib/inc/inc/hccl/hcom.h b/third_party/fwkacllib/inc/inc/hccl/hcom.h
new file mode 100644
index 00000000..972f470c
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/hccl/hcom.h
@@ -0,0 +1,179 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hcom.h
+ * @brief HCOM API
+ */
+
+#ifndef HCOM_H_
+#define HCOM_H_
+
+#include <hccl/base.h>
+#include <hccl/hccl_types.h>
+#include <functional>
+#include <vector>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+
+
+/**
+ * @brief Get the rank number in the group.
+ *
+ * @param group A string identifying the group name.
+ * @param rankSize A pointer identifying the rank number.
+ * @return HcclResult 
+ */
+HcclResult HcomGetRankSize(const char *group, u32 *rankSize);
+
+/**
+ * @brief Get the rank number of this rank's server within the group.
+ *
+ * @param group A string identifying the group name.
+ * @param localRankSize A pointer identifying the rank number.
+ * @return HcclResult 
+ */
+HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);
+
+/**
+ * @brief Get the rank id of this rank.
+ *
+ * @param group A string identifying the group name.
+ * @param rankId A pointer identifying the rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetRankId(const char *group, u32 *rankId);
+
+/**
+ * @brief Get the local rank id of this rank's server within the group.
+ *
+ * @param group A string identifying the group name.
+ * @param localRankId A pointer identifying the local rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);
+
+/**
+ * @brief Get the world rank id according to the group rank id.
+ *
+ * @param group A string identifying the group name.
+ * @param groupRank An integer(u32) identifying the group rank id.
+ * @param worldRank A pointer identifying the world rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);
+
+/**
+ * @brief Get the group rank id according to the world rank id.
+ *
+ * @param worldRank An integer(u32) identifying the world rank id.
+ * @param group A string identifying the group name.
+ * @param groupRank A pointer identifying the group rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);
+
+/**
+ * @brief Create group.
+ *
+ * @param group A string identifying the group name.
+ * @param rankNum An integer(u32) identifying the number of ranks in the group.
+ * @param rankIds A list identifying the ranks in the group.
+ * @return HcclResult 
+ */
+HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);
+
+/**
+ * @brief Destroy group
+ *
+ * @param group A string identifying the group name.
+ * @return HcclResult 
+ */
+HcclResult HcomDestroyGroup(const char *group);
+
+/**
+ * @brief Set the gradient split strategy with in the group, according to gradient index.
+ *
+ * @param group A string identifying the group name.
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param IdxList A list identifying the index of end gradient in each segment.
+ * @return HcclResult
+ */
+extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
+
+/**
+ * @brief Set the gradient split strategy with in the group, according to gradient data size.
+ *
+ * @param group A string identifying the group name.
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param sizeList A list identifying the percent of each segment.
+ * @return HcclResult
+ */
+extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
+
+/**
+ * @brief Initialize hcom executor.
+ *
+ * @param void
+ * @return HcclResult
+ */
+HcclResult HcomExecInitialize();
+
+/**
+ * @brief Finalize hcom executor.
+ *
+ * @param void
+ * @return HcclResult
+ */
+HcclResult HcomExecFinalize();
+
+/**
+ * @brief Put collective communication operation into hcom executor.
+ *
+ * @param opInfo information about collective communication operation.
+ * @param callback callback after collective communication operation.
+ * @return HcclResult
+ */
+HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
+
+/**
+ * @brief Put remote access operation into hcom executor.
+ *
+ * @param remoteAccessType operation type (read or write).
+ * @param addrInfos address information about collective communication operation.
+ * @param callback callback after collective communication operation.
+ * @return HcclResult
+ */
+HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
+                                       const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
+                                       std::function<void(HcclResult status)> callback);
+
+/**
+ * @brief Register memories and init resources for remote access.
+ *
+ * @param addrList memory addresses for remote access.
+ * @param count number of remote memory addresses.
+ * @return HcclResult
+ */
+extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // HCOM_H_
diff --git a/third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h
new file mode 100644
index 00000000..38a689ee
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _MMPA_API_H_
+#define _MMPA_API_H_
+
+#define LINUX 0
+#define WIN 1
+
+#if(OS_TYPE == LINUX) //lint !e553
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#ifdef FUNC_VISIBILITY
+#define MMPA_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define MMPA_FUNC_VISIBILITY
+#endif
+
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <unistd.h>
+#include <semaphore.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+#include <signal.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include <string.h>
+#include <poll.h>
+#include <net/if.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <ctype.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <getopt.h>
+#include <libgen.h>
+
+#include <linux/types.h>
+#include <linux/hdreg.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/shm.h>
+#include <sys/un.h>
+#include <sys/utsname.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/msg.h>
+#include <sys/wait.h>
+#include <sys/statvfs.h>
+#include <sys/prctl.h>
+#include <sys/inotify.h>
+
+#include "securec.h"
+
+#include "./sub_inc/mmpa_typedef_linux.h"
+#include "./sub_inc/mmpa_linux.h"
+
+#endif
+
+
+#if(OS_TYPE == WIN) //lint !e553
+
+#ifdef FUNC_VISIBILITY
+#define MMPA_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define MMPA_FUNC_VISIBILITY
+#endif
+
+#include <winsock2.h>
+#include <winsock.h>
+#include "Windows.h"
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <io.h>
+#include <stdio.h>
+#include <ws2tcpip.h>
+#include <winioctl.h>
+#include <WinBase.h>
+#include <mswsock.h>
+#include <strsafe.h>
+#include <signal.h>
+#include <time.h>
+#include <stdarg.h>
+#include "shlwapi.h"
+#include <direct.h>
+#include <VersionHelpers.h>
+#include <processthreadsapi.h>
+#include <Wbemidl.h>
+#include <iphlpapi.h>
+#include <synchapi.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "securec.h"
+
+#include "sub_inc/mmpa_typedef_win.h"
+#include "sub_inc/mmpa_win.h"
+
+#pragma comment(lib, "ws2_32.lib")
+#pragma comment(lib, "mswsock.lib")
+#pragma comment(lib, "Kernel32.lib")
+#pragma comment(lib, "shlwapi.lib")
+#pragma comment(lib, "wbemuuid.lib")
+#pragma comment(lib, "Iphlpapi.lib")
+#endif
+
+#endif // MMPA_API_H_
+
diff --git a/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h
new file mode 100644
index 00000000..993f36ba
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h
@@ -0,0 +1,561 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MMPA_LINUX_MMPA_LINUX_H
+#define MMPA_LINUX_MMPA_LINUX_H
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+
+#define MMPA_MACINFO_DEFAULT_SIZE 18
+#define MMPA_CPUDESC_DEFAULT_SIZE 64
+
+typedef pthread_t mmThread;
+typedef pthread_mutex_t mmMutex_t;
+typedef pthread_cond_t mmCond;
+typedef pthread_mutex_t mmMutexFC;
+typedef pthread_rwlock_t mmRWLock_t;
+typedef signed int mmProcess;
+typedef int mmPollHandle;
+typedef int mmPipeHandle;
+typedef int mmFileHandle;
+typedef int mmComPletionKey;
+typedef int mmCompletionHandle;
+typedef int mmErrorMsg;
+typedef int mmFd_t;
+
+typedef VOID *mmExitCode;
+typedef key_t mmKey_t;
+typedef int mmMsgid;
+typedef struct dirent mmDirent;
+typedef struct dirent mmDirent2;
+typedef struct shmid_ds mmshmId_ds;
+typedef int (*mmFilter)(const mmDirent *entry);
+typedef int (*mmFilter2)(const mmDirent2 *entry);
+typedef int (*mmSort)(const mmDirent **a, const mmDirent **b);
+typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b);
+typedef size_t mmSize_t; //lint !e410 !e1051
+typedef off_t mmOfft_t;
+typedef pid_t mmPid_t;
+typedef long MM_LONG;
+
+typedef VOID *(*userProcFunc)(VOID *pulArg);
+
+typedef struct {
+  userProcFunc procFunc;  // Callback function pointer
+  VOID *pulArg;           // Callback function parameters
+} mmUserBlock_t;
+
+typedef struct {
+  const char *dli_fname;
+  void *dli_fbase;
+  const char *dli_sname;
+  void *dli_saddr;
+  size_t dli_size; /* ELF only */
+  int dli_bind; /* ELF only */
+  int dli_type;
+} mmDlInfo;
+
+typedef struct {
+  int wSecond;             // Seconds. [0-60] (1 leap second)
+  int wMinute;             // Minutes. [0-59]
+  int wHour;               // Hours. [0-23]
+  int wDay;                // Day. [1-31]
+  int wMonth;              // Month. [1-12]
+  int wYear;               // Year
+  int wDayOfWeek;          // Day of week. [0-6]
+  int tm_yday;             // Days in year.[0-365]
+  int tm_isdst;            // DST. [-1/0/1]
+  long int wMilliseconds;  // milliseconds
+} mmSystemTime_t;
+
+typedef sem_t mmSem_t;
+typedef struct sockaddr mmSockAddr;
+typedef socklen_t mmSocklen_t;
+typedef int mmSockHandle;
+typedef timer_t mmTimer;
+typedef pthread_key_t mmThreadKey;
+
+typedef int mmOverLap;
+
+typedef ssize_t mmSsize_t;
+typedef size_t mmSize; // size
+
+typedef struct {
+  UINT32 createFlag;
+  INT32 oaFlag;
+} mmCreateFlag;
+
+typedef struct {
+  VOID *sendBuf;
+  INT32 sendLen;
+} mmIovSegment;
+typedef struct in_addr mmInAddr;
+
+typedef struct {
+  VOID *inbuf;
+  INT32 inbufLen;
+  VOID *outbuf;
+  INT32 outbufLen;
+  mmOverLap *oa;
+} mmIoctlBuf;
+
+typedef int mmAtomicType;
+typedef int mmAtomicType64;
+
+typedef enum {
+  pollTypeRead = 1,  // pipe read
+  pollTypeRecv,      // socket recv
+  pollTypeIoctl,     // ioctl
+} mmPollType;
+
+typedef struct {
+  mmPollHandle handle;            // The file descriptor or handle of poll is required
+  mmPollType pollType;            // Operation type requiring poll
+                                  // read or recv or ioctl
+  INT32 ioctlCode;                // IOCTL operation code, dedicated to IOCTL
+  mmComPletionKey completionKey;  // The default value is blank, which is used in windows
+                                  // The data used to receive the difference between which handle is readable
+} mmPollfd;
+
+typedef struct {
+  VOID *priv;              // User defined private content
+  mmPollHandle bufHandle;  // Value of handle corresponding to buf
+  mmPollType bufType;      // Data types polled to
+  VOID *buf;               // Data used in poll
+  UINT32 bufLen;           // Data length used in poll
+  UINT32 bufRes;           // Actual return length
+} mmPollData, *pmmPollData;
+
+typedef VOID (*mmPollBack)(pmmPollData);
+
+typedef struct {
+  INT32 tz_minuteswest;  // How many minutes is it different from Greenwich
+  INT32 tz_dsttime;      // type of DST correction
+} mmTimezone;
+
+typedef struct {
+  LONG tv_sec;
+  LONG tv_usec;
+} mmTimeval;
+
+typedef struct {
+  MM_LONG tv_sec;
+  MM_LONG tv_nsec;
+} mmTimespec;
+
+typedef struct {
+  ULONGLONG totalSize;
+  ULONGLONG freeSize;
+  ULONGLONG availSize;
+} mmDiskSize;
+
+#define mmTLS __thread
+typedef struct stat mmStat_t;
+typedef struct stat64 mmStat64_t;
+typedef mode_t mmMode_t;
+
+typedef struct option mmStructOption;
+
+typedef struct {
+  char addr[MMPA_MACINFO_DEFAULT_SIZE];  // ex:aa-bb-cc-dd-ee-ff\0
+} mmMacInfo;
+
+typedef struct {
+  char **argv;
+  INT32 argvCount;
+  char **envp;
+  INT32 envpCount;
+} mmArgvEnv;
+
+typedef struct {
+  char arch[MMPA_CPUDESC_DEFAULT_SIZE];
+  char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE];  // vendor
+  char version[MMPA_CPUDESC_DEFAULT_SIZE];       // modelname
+  INT32 frequency;                               // cpu frequency
+  INT32 maxFrequency;                            // max speed
+  INT32 ncores;                                  // cpu cores
+  INT32 nthreads;                                // cpu thread count
+  INT32 ncounts;                                 // logical cpu nums
+} mmCpuDesc;
+
+typedef mode_t MODE;
+
+typedef struct {
+  INT32 detachFlag;    // Determine whether to set separation property 0, not to separate 1
+  INT32 priorityFlag;  // Determine whether to set priority 0 and not set 1
+  INT32 priority;      // Priority value range to be set 1-99
+  INT32 policyFlag;    // Set scheduling policy or not 0 do not set 1 setting
+  INT32 policy;        // Scheduling policy value value
+                       //  MMPA_THREAD_SCHED_RR
+                       //  MMPA_THREAD_SCHED_OTHER
+                       //  MMPA_THREAD_SCHED_FIFO
+  INT32 stackFlag;     // Set stack size or not: 0 does not set 1 setting
+  UINT32 stackSize;    // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN
+} mmThreadAttr;
+
+#ifdef __ANDROID__
+#define S_IREAD S_IRUSR
+#define S_IWRITE S_IWUSR
+#endif
+
+#define mm_no_argument        no_argument
+#define mm_required_argument  required_argument
+#define mm_optional_argument  optional_argument
+
+#define M_FILE_RDONLY O_RDONLY
+#define M_FILE_WRONLY O_WRONLY
+#define M_FILE_RDWR O_RDWR
+#define M_FILE_CREAT O_CREAT
+
+#define M_RDONLY O_RDONLY
+#define M_WRONLY O_WRONLY
+#define M_RDWR O_RDWR
+#define M_CREAT O_CREAT
+#define M_BINARY O_RDONLY
+#define M_TRUNC O_TRUNC
+#define M_IRWXU S_IRWXU
+#define M_APPEND O_APPEND
+
+#define M_IN_CREATE IN_CREATE
+#define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
+#define M_IN_IGNORED IN_IGNORED
+
+#define M_OUT_CREATE IN_CREATE
+#define M_OUT_CLOSE_WRITE IN_CLOSE_WRITE
+#define M_OUT_IGNORED IN_IGNORED
+#define M_OUT_ISDIR IN_ISDIR
+
+#define M_IREAD S_IREAD
+#define M_IRUSR S_IRUSR
+#define M_IWRITE S_IWRITE
+#define M_IWUSR S_IWUSR
+#define M_IXUSR S_IXUSR
+#define FDSIZE 64
+#define M_MSG_CREAT IPC_CREAT
+#define M_MSG_EXCL (IPC_CREAT | IPC_EXCL)
+#define M_MSG_NOWAIT IPC_NOWAIT
+
+#define M_WAIT_NOHANG WNOHANG  // Non blocking waiting
+#define M_WAIT_UNTRACED \
+  WUNTRACED  // If the subprocess enters the suspended state, it will return immediately
+             // But the end state of the subprocess is ignored
+#define M_UMASK_USRREAD S_IRUSR
+#define M_UMASK_GRPREAD S_IRGRP
+#define M_UMASK_OTHREAD S_IROTH
+
+#define M_UMASK_USRWRITE S_IWUSR
+#define M_UMASK_GRPWRITE S_IWGRP
+#define M_UMASK_OTHWRITE S_IWOTH
+
+#define M_UMASK_USREXEC S_IXUSR
+#define M_UMASK_GRPEXEC S_IXGRP
+#define M_UMASK_OTHEXEC S_IXOTH
+
+#define mmConstructor(x) __attribute__((constructor)) VOID x()
+#define mmDestructor(x) __attribute__((destructor)) VOID x()
+
+#define MMPA_NO_ARGUMENT 0
+#define MMPA_REQUIRED_ARGUMENT 1
+#define MMPA_OPTIONAL_ARGUMENT 2
+
+#define MMPA_MAX_PATH PATH_MAX
+#define M_NAME_MAX MAX_FNAME
+
+#define M_F_OK F_OK
+#define M_X_OK X_OK
+#define M_W_OK W_OK
+#define M_R_OK R_OK
+
+
+#define MM_DT_DIR DT_DIR
+#define MM_DT_REG DT_REG
+
+#define MMPA_STDIN STDIN_FILENO
+#define MMPA_STDOUT STDOUT_FILENO
+#define MMPA_STDERR STDERR_FILENO
+
+#define MMPA_RTLD_NOW RTLD_NOW
+#define MMPA_RTLD_GLOBAL RTLD_GLOBAL
+#define MMPA_RTLD_LAZY RTLD_LAZY
+#define MMPA_RTLD_NODELETE RTLD_NODELETE
+
+#define MMPA_DL_EXT_NAME ".so"
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle);
+MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond);
+MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmGetPid();
+MMPA_FUNC_VISIBILITY INT32 mmGetTid();
+MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
+MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
+MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);
+
+MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
+MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags);
+MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode);
+MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type);
+MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd);
+MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen);
+MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen);
+MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol);
+MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog);
+MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
+MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
+                                          VOID *sendMsg,
+                                          INT32 sendLen,
+                                          UINT32 sendFlag,
+                                          const mmSockAddr* addr,
+                                          INT32 tolen);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
+                                                VOID *recvBuf,
+                                                mmSize recvLen,
+                                                UINT32 recvFlag,
+                                                mmSockAddr* addr,
+                                                mmSocklen_t *FromLen);
+MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
+MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
+MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
+MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
+MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
+MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
+MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
+MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
+                                               mmUserBlock_t *timerBlock,
+                                               UINT milliSecond,
+                                               UINT period);
+MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
+MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode);
+MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid);
+MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio);
+MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle);
+MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio);
+MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName);
+MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode);
+MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName);
+
+MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr);
+MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 iovcnt);
+MMPA_FUNC_VISIBILITY VOID mmMb();
+MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);
+
+MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
+MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
+MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);
+
+MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+
+// The following 3 interfaces are to be deleted
+MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
+
+// Poll related interface
+MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
+MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
+MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
+                                  INT32 fdCount,
+                                  INT32 timeout,
+                                  mmCompletionHandle handleIOCP,
+                                  pmmPollData polledData,
+                                  mmPollBack pollBack);
+MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
+MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
+MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
+MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount();
+MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath);
+MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen);
+
+MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd);
+
+MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd);
+
+MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename);
+
+MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode);
+
+MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream);
+
+MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort);
+MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort);
+
+MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count);
+MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count);
+
+MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag);
+
+MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag);
+
+MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);
+
+MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);
+
+MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid);
+
+MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetOptErr();
+MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptInd();
+MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt();
+MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
+MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
+MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
+MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
+                                        char *const *argv,
+                                        const char *opts,
+                                        const mmStructOption *longOpts,
+                                        INT32 *longIndex);
+
+MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
+MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
+
+MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *));
+MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value);
+MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key);
+MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsType();
+
+MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd);
+MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd);
+MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path);
+MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode);
+MMPA_FUNC_VISIBILITY INT32 mmThreadKill(mmThread id);
+MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen);
+MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len);
+MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite);
+MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr);
+MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path);
+MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path);
+MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize);
+
+/*
+ * Function: set the thread name created by mmcreatetask
+ * Input: pstThreadHandle: thread ID
+ *  name: thread name, the actual length of name must be < MMPA_THREADNAME_SIZE
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
+ * execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name);
+
+/*
+ * Function: get thread name
+ * Input: pstThreadHandle: thread ID
+ *      size: Cache length of thread name
+ *  name:User allocated cache for thread name, Cache length must be >= MMPA_THREADNAME_SIZE
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
+ * execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size);
+/*
+ * Function:Set the thread name of the currently executing thread - call inside the thread body
+ * Input:name:Thread name to be set
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
+ * execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name);
+/*
+ * Function:Get the thread name of the currently executing thread - in body call
+ * Input:name:The name of the thread to get, and the cache is allocated by the user，size>=MMPA_THREADNAME_SIZE
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
+ * execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size);
+MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length);
+MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength);
+MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
+MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
+MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
+MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
+MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
+                                           const mmArgvEnv *env,
+                                           const char *stdoutRedirectFile,
+                                           mmProcess *id);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
+                                                      const mmUserBlock_t *funcBlock,
+                                                      const mmThreadAttr *threadAttr);
+MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
+MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
+MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
+MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+#define MMPA_DLL_API
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif /* __cpluscplus */
+#endif // __cpluscplus
+
+#endif // MMPA_LINUX_MMPA_LINUX_H_
diff --git a/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h
new file mode 100644
index 00000000..9df5b9ce
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MMPA_TYPEDEF_LINUX_H
+#define MMPA_TYPEDEF_LINUX_H
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif // __cpluscplus
+#endif // __cpluscplus
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+typedef unsigned char UINT8;
+typedef signed char INT8;
+typedef unsigned short UINT16;
+typedef signed short INT16;
+typedef unsigned int UINT32;
+typedef signed int INT32;
+typedef unsigned long long UINT64;
+typedef signed long long INT64;
+typedef float FLOAT;
+typedef double DOUBLE;
+typedef void VOID;
+typedef unsigned char UCHAR;
+typedef char CHAR;
+typedef unsigned short USHORT;
+typedef short SHORT;
+typedef unsigned int UINT;
+typedef int INT;
+typedef unsigned long ULONG;
+typedef unsigned long long ULONGLONG;
+
+typedef long LONG;
+
+#define HANDLE_INVALID_VALUE (-1)
+#define MMPA_MEM_MAX_LEN (0x7fffffff)
+#define MMPA_PROCESS_ERROR (0x7fffffff)
+#define PATH_SIZE 256
+#define MAX_IOVEC_SIZE 32
+#define MMPA_MAX_SLEEP_MILLSECOND 4294967
+#define MAX_PIPE_COUNT 2
+#define MMPA_PIPE_COUNT 2
+#define MMPA_THREADNAME_SIZE 16
+#define MMPA_MIN_OS_NAME_SIZE 64
+#define MMPA_MIN_OS_VERSION_SIZE 128
+
+#define MMPA_ONE_THOUSAND 1000
+#define MMPA_ONE_BILLION 1000000000
+#define MMPA_COMPUTER_BEGIN_YEAR 1900
+#define MMPA_ZERO 0
+#define MMPA_MAX_THREAD_PIO 99
+#define MMPA_MIN_THREAD_PIO 1
+#define MMPA_DEFAULT_PIPE_PERMISSION 0777
+#define MMPA_DEFAULT_MSG_TYPE 1
+
+#define MMPA_THREAD_SCHED_RR SCHED_RR
+#define MMPA_THREAD_SCHED_FIFO SCHED_FIFO
+#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
+#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN
+
+#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+
+#define MMPA_MAX_NI 19
+#define MMPA_MIN_NI (-20)
+
+#define EN_OK 0
+#define EN_ERR 1
+#define EN_ERROR (-1)
+#define EN_INVALID_PARAM (-2)
+#define EN_TIMEOUT (-3)
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif // __cpluscplus
+#endif // __cpluscplus
+#endif // MMPA_TYPEDEF_LINUX_H_
diff --git a/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h
new file mode 100644
index 00000000..58ebb1a0
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h
@@ -0,0 +1,83 @@
+﻿/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MMPA_TYPEDEF_WIN_H
+#define MMPA_TYPEDEF_WIN_H
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define EN_OK 0
+#define EN_ERR 1
+#define EN_ERROR (-1)
+#define EN_INVALID_PARAM (-2)
+#define EN_TIMEOUT (-3)
+
+#define HANDLE_INVALID_VALUE (-1)
+#define INVALID_SOCKET_HANDLE INVALID_SOCKET
+#define MMPA_MEM_MAX_LEN (0x7fffffff)
+#define MMPA_PROCESS_ERROR (0x7fffffff)
+
+#define MMPA_ONE_THOUSAND 1000
+#define MMPA_COMPUTER_BEGIN_YEAR 1900
+#define SUMMER_TIME_OR_NOT (-1)
+#define MMPA_ZERO 0
+#define MMPA_VALUE_ONE 1
+#define MMPA_SOCKET_MAIN_EDITION 2
+#define MMPA_SOCKET_SECOND_EDITION 0
+#define MMPA_PIPE_BUF_SIZE 1024
+#define MMPA_MAX_SCANDIR_COUNT 1024
+#define MAX_IOVEC_SIZE 32
+#define MMPA_PIPE_COUNT 2
+#define MMPA_THREADNAME_SIZE 16
+#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
+#define MMPA_MIN_OS_VERSION_SIZE 64
+
+#define MMPA_MAX_NI 19
+#define MMPA_MIDDLE_NI 5
+#define MMPA_LOW_NI (-5)
+#define MMPA_MIN_NI (-20)
+#define MMPA_MAX_FILE 128
+
+#define MMPA_MAX_THREAD_PIO 99
+#define MMPA_MIDDLE_THREAD_PIO 66
+#define MMPA_LOW_THREAD_PIO 33
+#define MMPA_MIN_THREAD_PIO 1
+
+#define MMPA_THREAD_SCHED_RR 0
+#define MMPA_THREAD_SCHED_FIFO 0
+#define MMPA_THREAD_SCHED_OTHER 0
+#define MMPA_THREAD_MIN_STACK_SIZE 0
+
+#define MM_MUTEX_INITIALIZER NULL
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+#endif  // _MMPA_TYPEDEF_WIN_H_
diff --git a/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h
new file mode 100644
index 00000000..49e97a5d
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h
@@ -0,0 +1,566 @@
+﻿/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MMPA_WIN_MMPA_WIN_H
+#define MMPA_WIN_MMPA_WIN_H
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+#ifdef MMPA_DLL
+#define MMPA_DLL_API __declspec(dllexport)
+#else
+#define MMPA_DLL_API __declspec(dllimport)
+#endif
+
+#define MMPA_MACINFO_DEFAULT_SIZE 18
+#define MMPA_CPUDESC_DEFAULT_SIZE 64
+
+#pragma section(".CRT$XCU", long, read)
+#pragma section(".CRT$XPU", long, read)
+
+typedef HANDLE mmMutex_t;
+typedef HANDLE mmThread;
+typedef HANDLE mmProcess;
+typedef HANDLE mmPollHandle;
+typedef HANDLE mmPipeHandle;
+typedef HANDLE mmFileHandle;
+typedef HANDLE mmCompletionHandle;
+typedef HANDLE mmFd_t;
+typedef CRITICAL_SECTION mmMutexFC;
+typedef CONDITION_VARIABLE mmCond;
+
+typedef VOID *(*userProcFunc)(VOID *pulArg);
+typedef struct {
+  userProcFunc procFunc;
+  VOID *pulArg;
+} mmUserBlock_t;
+
+typedef DWORD mmThreadKey;
+typedef SYSTEMTIME mmSystemTime_t;
+
+typedef HANDLE mmSem_t;
+typedef SOCKET mmSockHandle;
+typedef SRWLOCK mmRWLock_t;
+typedef struct sockaddr mmSockAddr;
+typedef int mmSocklen_t;
+typedef int mmSemTimeout_t;
+typedef long mmAtomicType;
+typedef long long mmAtomicType64;
+typedef DWORD mmExitCode;
+typedef DWORD  mmErrorMsg;
+typedef int mmKey_t;
+typedef HANDLE mmMsgid;
+typedef long int mmOfft_t;
+typedef int mmPid_t;
+
+typedef INT32 mmSsize_t;
+typedef int mmSize; // size
+typedef size_t mmSize_t;
+typedef VOID mmshmId_ds;
+typedef long long MM_LONG;
+
+typedef enum {
+  DT_DIR = FILE_ATTRIBUTE_DIRECTORY,
+} mmDtype;
+
+typedef struct {
+  unsigned char d_type;
+  char d_name[MAX_PATH];  // file name
+} mmDirent;
+
+typedef struct {
+  unsigned long d_type;
+  char d_name[MAX_PATH];  // file name
+} mmDirent2;
+
+typedef int (*mmFilter)(const mmDirent *entry);
+typedef int (*mmFilter2)(const mmDirent2 *entry);
+typedef int (*mmSort)(const mmDirent **a, const mmDirent **b);
+typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b);
+
+typedef struct {
+  VOID *sendBuf;
+  INT32 sendLen;
+} mmIovSegment;
+typedef PVOID mmInAddr;
+
+typedef enum {
+  pollTypeRead = 1,  // pipeline reading
+  pollTypeRecv,      // socket receive
+  pollTypeIoctl,     // ioctl read
+} mmPollType;
+
+typedef struct {
+  HANDLE completionHandle;
+  mmPollType overlapType;
+  OVERLAPPED oa;
+} mmComPletionKey, *pmmComPletionKey;
+
+typedef struct {
+  VOID *priv;              // User defined private content
+  mmPollHandle bufHandle;  // Value of handle corresponding to buf
+  mmPollType bufType;      // Data types polled to
+  VOID *buf;
+  UINT32 bufLen;
+  UINT32 bufRes;
+} mmPollData, *pmmPollData;
+
+typedef VOID (*mmPollBack)(pmmPollData);
+typedef struct {
+  mmPollHandle handle;            // The file descriptor or handle of poll is required
+  mmPollType pollType;            // Operation type requiring poll，read or recv or ioctl
+  INT32 ioctlCode;                // IOCTL operation code, dedicated to IOCTL
+  mmComPletionKey completionKey;  // The default value is blank, which will be used in windows to receive the data with
+                                  // different handle
+} mmPollfd;
+
+typedef struct {
+  OVERLAPPED oa;
+  HANDLE completionHandle;
+  WSABUF DataBuf;
+} PRE_IO_DATA, *PPRE_IO_DATA;
+
+typedef OVERLAPPED mmOverLap;
+
+typedef struct {
+  UINT32 createFlag;
+  INT32 oaFlag;  // Overlap operation is supported if it is not 0
+} mmCreateFlag;
+
+typedef struct {
+  VOID *inbuf;
+  INT32 inbufLen;
+  VOID *outbuf;
+  INT32 outbufLen;
+  mmOverLap *oa;
+} mmIoctlBuf;
+
+typedef struct {
+  HANDLE timerQueue;
+  HANDLE timerHandle;
+} mmTimerHandle;
+
+typedef struct {
+  LONG tv_sec;
+  LONG tv_usec;
+} mmTimeval;
+
+typedef struct {
+  INT32 tz_minuteswest;  // How many minutes is it different from Greenwich
+  INT32 tz_dsttime;      // DST correction type
+} mmTimezone;
+
+typedef struct {
+  MM_LONG tv_sec;
+  MM_LONG tv_nsec;
+} mmTimespec;
+
+typedef mmTimerHandle mmTimer;
+
+#define mmTLS __declspec(thread)
+
+typedef struct stat mmStat_t;
+typedef struct _stat64 mmStat64_t;
+typedef int mmMode_t;
+
+typedef int MODE;
+
+typedef struct {
+  const char *name;
+  int has_arg;
+  int *flag;
+  int val;
+} mmStructOption;
+
+typedef struct {
+  ULONGLONG totalSize;
+  ULONGLONG freeSize;
+  ULONGLONG availSize;
+} mmDiskSize;
+
+typedef struct {
+  const char *dli_fname;
+  void *dli_fbase;
+  const char *dli_sname;
+  void *dli_saddr;
+  size_t dli_size; /* ELF only */
+  int dli_bind; /* ELF only */
+  int dli_type;
+} mmDlInfo;
+
+typedef struct {
+  char addr[MMPA_MACINFO_DEFAULT_SIZE];  // ex:aa-bb-cc-dd-ee-ff\0
+} mmMacInfo;
+
+typedef struct {
+  char arch[MMPA_CPUDESC_DEFAULT_SIZE];
+  char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE];  // vendor
+  char version[MMPA_CPUDESC_DEFAULT_SIZE];       // modelname
+  INT32 frequency;                               // cpu frequency
+  INT32 maxFrequency;                            // max speed
+  INT32 ncores;                                  // cpu cores
+  INT32 nthreads;                                // cpu thread count
+  INT32 ncounts;                                 // logical cpu nums
+} mmCpuDesc;
+
+typedef struct {
+  char **argv;
+  INT32 argvCount;
+  char **envp;
+  INT32 envpCount;
+} mmArgvEnv;
+
+// Windows currently does not support properties other than thread separation properties
+typedef struct {
+  INT32 detachFlag;  // Thread detach property: 0 do not detach 1 detach
+  INT32 priorityFlag;
+  INT32 priority;
+  INT32 policyFlag;
+  INT32 policy;
+  INT32 stackFlag;
+  UINT32 stackSize;
+} mmThreadAttr;
+
+typedef VOID (*mmPf)(VOID);
+
+#define mm_no_argument        0
+#define mm_required_argument  1
+#define mm_optional_argument  2
+
+#define M_FILE_RDONLY GENERIC_READ
+#define M_FILE_WRONLY GENERIC_WRITE
+#define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
+#define M_FILE_CREAT OPEN_ALWAYS
+
+#define M_RDONLY _O_RDONLY
+#define M_WRONLY _O_WRONLY
+#define M_RDWR _O_RDWR
+#define M_IRWXU _O_RDWR
+#define M_CREAT _O_CREAT
+#define M_BINARY _O_BINARY
+#define M_TRUNC _O_TRUNC
+#define M_APPEND _O_APPEND
+
+#define M_IREAD _S_IREAD
+#define M_IRUSR _S_IREAD
+#define M_IWRITE _S_IWRITE
+#define M_IWUSR _S_IWRITE
+#define M_IXUSR 0
+
+#define M_IN_CREATE FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME
+#define M_IN_CLOSE_WRITE FILE_NOTIFY_CHANGE_LAST_WRITE
+#define M_IN_IGNORED FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME
+
+#define M_OUT_CREATE 0x00000100
+#define M_OUT_CLOSE_WRITE 0x00000008
+#define M_OUT_IGNORED 0x00008000
+#define M_OUT_ISDIR 0x40000000
+
+#define M_MSG_CREAT 1
+#define M_MSG_EXCL 2
+#define M_MSG_NOWAIT 3
+
+#define M_WAIT_NOHANG 1
+#define M_WAIT_UNTRACED 2
+
+#define M_UMASK_USRREAD _S_IREAD
+#define M_UMASK_GRPREAD _S_IREAD
+#define M_UMASK_OTHREAD _S_IREAD
+
+#define M_UMASK_USRWRITE _S_IWRITE
+#define M_UMASK_GRPWRITE _S_IWRITE
+#define M_UMASK_OTHWRITE _S_IWRITE
+
+#define M_UMASK_USREXEC 0
+#define M_UMASK_GRPEXEC 0
+#define M_UMASK_OTHEXEC 0
+
+#define DT_UNKNOWN 0
+#define DT_FIFO 1
+#define DT_CHR 2
+#define DT_BLK 6
+#define DT_REG 8
+#define DT_LNK 10
+#define DT_SOCK 12
+#define DT_WHT 14
+#define MM_DT_DIR 16
+#define MM_DT_REG 32
+
+#define mmConstructor(x) __declspec(allocate(".CRT$XCU")) mmPf con = x
+#define mmDestructor(x) __declspec(allocate(".CRT$XPU")) mmPf de = x
+
+#define MMPA_PRINT_ERROR ((opterr) && (*options != ':'))
+#define MMPA_FLAG_PERMUTE 0x01   // permute non-options to the end of argv
+#define MMPA_FLAG_ALLARGS 0x02   // treat non-options as args to option "-1"
+#define MMPA_FLAG_LONGONLY 0x04  // operate as getopt_long_only
+// return values
+#define MMPA_BADCH (INT32)'?'
+#define MMPA_BADARG ((*options == ':') ? (INT32)':' : (INT32)'?')
+#define MMPA_INORDER (INT32)1
+
+#define MMPA_NO_ARGUMENT 0
+#define MMPA_REQUIRED_ARGUMENT 1
+#define MMPA_OPTIONAL_ARGUMENT 2
+
+#define MMPA_EMSG ""
+#define MMPA_MAX_PATH MAX_PATH
+#define M_NAME_MAX  _MAX_FNAME
+
+#define M_F_OK 0
+#define M_X_OK 1
+#define M_W_OK 2
+#define M_R_OK 4
+
+#define MMPA_STDIN stdin
+#define MMPA_STDOUT stdout
+#define MMPA_STDERR stderr
+
+#define MMPA_RTLD_NOW 0
+#define MMPA_RTLD_GLOBAL 0
+#define MMPA_RTLD_LAZY 0
+#define MMPA_RTLD_NODELETE 0
+
+#define MMPA_DL_EXT_NAME ".dll"
+
+#define __attribute__(v)
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle);
+MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock);
+MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex);
+MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond);
+
+MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
+MMPA_FUNC_VISIBILITY INT32 mmGetPid(VOID);
+MMPA_FUNC_VISIBILITY INT32 mmGetTid(VOID);
+MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
+MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
+MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);
+MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
+MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem);
+MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags);
+MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode);
+MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type);
+MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd);
+MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen);
+MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen);
+MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol);
+MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog);
+MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
+MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
+MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
+                                          VOID *sendMsg,
+                                          INT32 sendLen,
+                                          UINT32 sendFlag,
+                                          const mmSockAddr* addr,
+                                          INT32 tolen);
+MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
+                                                VOID *recvBuf,
+                                                mmSize recvLen,
+                                                UINT32 recvFlag,
+                                                mmSockAddr* addr,
+                                                mmSocklen_t *FromLen);
+MMPA_FUNC_VISIBILITY INT32 mmSAStartup(VOID);
+MMPA_FUNC_VISIBILITY INT32 mmSACleanup(VOID);
+MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT mode);
+MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
+MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *fileName);
+MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
+MMPA_FUNC_VISIBILITY CHAR *mmDlerror(VOID);
+MMPA_FUNC_VISIBILITY INT32
+    mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
+MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
+MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer);
+MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode);
+MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond);
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid);
+MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio);
+MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle);
+MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio);
+MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName);
+MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode);
+MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName);
+
+MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr);
+MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT32 iovcnt);
+MMPA_FUNC_VISIBILITY VOID mmMb();
+MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);
+
+MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
+MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
+MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
+MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);
+
+MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value);
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
+MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
+
+MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
+MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
+MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
+                                  pmmPollData polledData, mmPollBack pollBack);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
+MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
+MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
+MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount();
+MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath);
+
+MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen);
+
+MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd);
+MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd);
+MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename);
+MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode);
+MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream);
+MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort);
+MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort);
+MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count);
+MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count);
+
+MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag);
+MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag);
+MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);
+MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);
+
+MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid);
+
+MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptErr();
+MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptInd();
+MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt();
+MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
+MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
+MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
+MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
+MMPA_FUNC_VISIBILITY INT32
+    mmGetOptLong(INT32 argc, CHAR *const *argv, const CHAR *opts, const mmStructOption *longopts, INT32 *longindex);
+
+MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
+MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
+
+MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *));
+MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value);
+MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key);
+MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsType();
+
+MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd);
+MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd);
+MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path);
+MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode);
+MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen);
+MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len);
+MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite);
+MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path);
+MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path);
+MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize);
+
+MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name);
+MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size);
+
+/*
+ * Function: set the thread name of the currently executing thread - internal call of thread, which is not supported
+ * under Windows temporarily, and is null.
+ * Input: name: the thread name to be set
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
+ * execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name);
+
+/*
+ * Function: Get the thread name of the currently executing thread - thread body call, not supported under windows, null
+ * implementation.
+ * Input:name:The name of the thread to get, and the cache is allocated by the user，size>=MMPA_THREADNAME_SIZE.
+ * The input parameter error returns EN_INVALID_PARAM, the execution success returns
+ * EN_OK, and the execution failure returns EN_ERROR
+ */
+MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size);
+
+MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length);
+MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize);
+MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength);
+MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
+MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
+MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
+MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
+MMPA_FUNC_VISIBILITY INT32
+    mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, mmProcess *id);
+
+MMPA_FUNC_VISIBILITY INT32
+    mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, const mmThreadAttr *threadAttr);
+MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
+MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
+MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
+MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif /* __cpluscplus */
+#endif // __cpluscplus
+
+#endif // MMPA_WIN_MMPA_WIN_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/aipp.h b/third_party/fwkacllib/inc/inc/ops/aipp.h
new file mode 100644
index 00000000..86805f72
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/aipp.h
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file aipp.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC),
+image normalization (by subtracting the mean value or multiplying a factor), image cropping
+(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n
+
+*@par Inputs:
+*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer.
+*@li params: Dynamic AIPP configuration parameters of type uint8. \n
+
+*@par Attributes:
+*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n
+
+*@par Outputs:
+*features: The AIPP-processed output tensor of type float16 or uint8.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
+*/
+REG_OP(Aipp)
+    .INPUT(images, TensorType{DT_UINT8})
+    .OPTIONAL_INPUT(params, TensorType{DT_UINT8})
+    .OUTPUT(features, TensorType({DT_FLOAT16, DT_UINT8}))
+    .ATTR(aipp_config_path, String, "./aipp.cfg")
+    .OP_END_FACTORY_REG(Aipp)
+
+/**
+*@brief Performs this op is for dynamic aipp.If you set aipp-mode to dynamic
+in aipp config file, framework will auto add one input node to graph at last. \n
+
+*@par Inputs:
+*data: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. \n
+
+*@par Attributes:
+*index: specify aipp serial num \n
+
+*@par Outputs:
+*out: The AIPP-processed output tensor of all types. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AippData.
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
+*/
+REG_OP(AippData)
+    .INPUT(data, TensorType::ALL())
+    .OUTPUT(out, TensorType::ALL())
+    .ATTR(index, Int, 0)
+    .OP_END_FACTORY_REG(AippData)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/all_ops.h b/third_party/fwkacllib/inc/inc/ops/all_ops.h
new file mode 100644
index 00000000..cc11f5f9
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/all_ops.h
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file all_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
+
+#include "aipp.h"
+#include "array_ops.h"
+#include "audio_ops.h"
+#include "batch_ops.h"
+#include "bitwise_ops.h"
+#include "boosted_trees_ops.h"
+#include "candidate_sampling_ops.h"
+#include "control_flow_ops.h"
+#include "ctc_ops.h"
+#include "data_flow_ops.h"
+#include "elewise_calculation_ops.h"
+#include "functional_ops.h"
+#include "get_data_ops.h"
+#include "hcom_ops.h"
+#include "hvd_ops.h"
+#include "image_ops.h"
+#include "internal_ops.h"
+#include "linalg_ops.h"
+#include "list_ops.h"
+#include "logging_ops.h"
+#include "lookup_ops.h"
+#include "math_ops.h"
+#include "matrix_calculation_ops.h"
+#include "nn_batch_norm_ops.h"
+#include "nn_calculation_ops.h"
+#include "nn_detect_ops.h"
+#include "nn_norm_ops.h"
+#include "nn_ops.h"
+#include "nn_pooling_ops.h"
+#include "nn_training_ops.h"
+#include "nonlinear_fuc_ops.h"
+#include "no_op.h"
+#include "npu_loss_scale_ops.h"
+#include "outfeed_ops.h"
+#include "pad_ops.h"
+#include "parsing_ops.h"
+#include "quantize_ops.h"
+#include "ragged_conversion_ops.h"
+#include "random_ops.h"
+#include "reduce_ops.h"
+#include "resource_variable_ops.h"
+#include "rnn.h"
+#include "rpn_ops.h"
+#include "save_ops.h"
+#include "selection_ops.h"
+#include "set_ops.h"
+#include "sparse_ops.h"
+#include "split_combination_ops.h"
+#include "stateful_random_ops.h"
+#include "stateless_random_ops.h"
+#include "state_ops.h"
+#include "string_ops.h"
+#include "swap_co_ops.h"
+#include "transformation_ops.h"
+#include "condtake_ops.h"
+#include "warp_perspective_ops.h"
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/array_ops.h b/third_party/fwkacllib/inc/inc/ops/array_ops.h
new file mode 100644
index 00000000..375802fc
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/array_ops.h
@@ -0,0 +1,1231 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file array_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Applies lower_bound(sorted_search_values, values) along each row. \n
+
+*@par Inputs:
+*The input sorted_x and values can be one-dimensional vector. Inputs include:
+* @li sorted_x:A `Tensor`. 2-D Tensor where each row is ordered.
+* @li values:A `Tensor`. Must have the same type as `sorted_x`. \n
+
+*@par Attributes:
+*@li out_type:An optional `DType` from: `int32, int64`.
+Defaults to `int32`. \n
+
+*@par Outputs:
+*y: A `Tensor` of type `out_type`. \n
+
+*@attention Constraints:
+*The implementation for LowerBound on Ascend uses AI CPU, with bad performance. \n
+
+*@par Quantization supported or not
+*Not supported
+*@par Quantized inference supported or not
+*Supported
+*@par L2 convergence supported or not
+*@par Multiple batches supported or not \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Operator LowerBound.
+*/
+
+REG_OP(LowerBound)
+    .INPUT(sorted_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(out_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(LowerBound)
+
+/**
+*@brief Reverses variable length slices. \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+* @li x: A Tensor. The input to reverse.
+* @li seq_lengths: A 1D Tensor of type int32 or int64. \n
+
+*@par Attributes:
+*@li seq_dim: An optional int. The dimension along which
+reversal is performed.
+*@li batch_dim: An optional int. Defaults to "0". The dimension along which
+reversal is performed. \n
+
+*@par Outputs:
+*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. \n
+
+*@attention Constraints:
+*ReverseSequence runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ReverseSequence.
+*/
+
+REG_OP(ReverseSequence)
+    .INPUT(x,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(seq_lengths, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .REQUIRED_ATTR(seq_dim, Int)
+    .ATTR(batch_dim, Int, 0)
+    .OP_END_FACTORY_REG(ReverseSequence)
+
+/**
+*@brief Copies a tensor setting everything outside a central band in each innermost matrix. \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+* @li x: A rank k tensor.
+* @li num_lower: A 0D tensor. Number of superdiagonals to keep. If negative,
+keeps entire upper triangle.
+* @li num_upper: A 0D tensor. Number of superdiagonals to keep. If negative,
+keeps entire upper triangle. \n
+
+*@par Outputs:
+*y: A rank k tensor. Has the same shape as input. The extracted banded tensor. \n
+
+*@attention Constraints:
+*MatrixBandPart runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MatrixBandPart.
+*/
+
+REG_OP(MatrixBandPart)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, \
+           DT_INT16, DT_UINT16, DT_INT32, DT_INT64,
+           DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL,
+           DT_COMPLEX64, DT_COMPLEX128 }))
+    .INPUT(num_lower, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(num_upper, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL,
+           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(MatrixBandPart)
+
+/**
+*@brief Finds unique elements in a 1D tensor. \n
+
+*@par Inputs:
+*x: 1D tensor.
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars. \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64".
+Defaults to "int32". \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type as "x".
+*@li idx: A Tensor of type "out_idx".
+*@li count: A Tensor of type "out_idx". \n
+
+*@attention Constraints:
+*UniqueWithCounts runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UniqueWithCounts.
+*/
+
+REG_OP(UniqueWithCounts)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING }))
+    .OUTPUT(idx, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(count, TensorType({ DT_INT32, DT_INT64 }))
+    .REQUIRED_ATTR(out_idx, Type)
+    .OP_END_FACTORY_REG(UniqueWithCounts)
+
+/**
+*@brief Finds unique elements in a 1D tensor. \n
+
+*@par Inputs:
+*x: 1D tensor.
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars. \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64". Defaults to "int32". \n
+
+*@par Outputs:
+*@li y: "x" in the unique output "y".
+*@li idx: A tensor the same size as "x". The index of each value of "x". \n
+
+*@attention Constraints:
+*Unique runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Unique.
+*/
+
+REG_OP(Unique)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(idx, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(out_idx, Type, DT_INT32)
+    .OP_END_FACTORY_REG(Unique)
+
+/**
+*@brief Finds unique elements in a 1D tensor. \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+*Including:
+* @li x: 1D tensor.
+* @li axis: A Tensor of type int32. Defaults to "None". \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64".
+Defaults to "int32". \n
+
+*@par Outputs:
+*@li y: "x" in the unique output "y".
+*@li idx: A tensor the same size as "x". The index of each value of "x". \n
+
+*@attention Constraints:
+*UniqueExt2 runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UniqueExt2.
+*/
+
+REG_OP(UniqueExt2)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(axis, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(idx, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(out_idx, Type, DT_INT32)
+    .OP_END_FACTORY_REG(UniqueExt2)
+
+/**
+*@brief Computes the inverse permutation of a tensor. \n
+
+*@par Inputs:
+*x: A k-dimensional tensor. \n
+
+*@par Outputs:
+*y: A 1D tensor. \n
+
+*@attention Constraints:
+*InvertPermutation runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InvertPermutation.
+*/
+
+REG_OP(InvertPermutation)
+    .INPUT(x, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(InvertPermutation)
+
+/**
+*@brief Checks a tensor for NaN and Inf values. \n
+
+*@par Inputs:
+*x: A k-dimensional tensor. \n
+
+*@par Attributes:
+*message: Prefix of the error message. \n
+
+*@par Outputs:
+*y: The output tensor. \n
+
+*@attention Constraints:
+*CheckNumerics runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator CheckNumerics.
+*/
+
+REG_OP(CheckNumerics)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(message, String)
+    .OP_END_FACTORY_REG(CheckNumerics)
+
+/**
+*@brief Converts an array of flat indices into a tuple of coordinate arrays. \n
+
+*@par Inputs:
+*Input "indices" is a 0D or 1D tensor. Input "dims" is a 1D tensor.
+* @li indices: A 0D or 1D int Tensor whose elements are indices into
+the flattened version of an array of dimensions "dims".
+* @li dims: A 1D int Tensor of the same type as "indices".
+*The shape of the array to use for unraveling indices. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "indices". \n
+
+*@attention Constraints:
+*UnravelIndex runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UnravelIndex.
+*/
+
+REG_OP(UnravelIndex)
+    .INPUT(indices, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(dims, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(UnravelIndex)
+
+/**
+*@brief Applies upper_bound(sorted_search_values, values) along each row. \n
+
+*@par Inputs:
+*Inputs "sorted_x" and "values" are 2D tensors.
+* @li sorted_x: A 2D Tensor where each row is ordered.
+* @li values: A 2D Tensor with the same numbers of rows as "sorted_x. \n
+
+*@par Attributes:
+*out_type: sets the optional out_type attribute to value. \n
+
+*@par Outputs:
+*y: A Tensor with the same shape as "values". \n
+
+*@attention Constraints:
+*UpperBound runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UpperBound.
+*/
+
+REG_OP(UpperBound)
+    .INPUT(sorted_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+      DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+      DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(out_type, Type)
+    .OP_END_FACTORY_REG(UpperBound)
+
+/**
+*@brief Finds unique elements in a 1D tensor. \n
+
+*@par Inputs:
+*Inputs "x" and "axis" are 1D vectors.
+* @li x: A 1D tensor.
+* @li axis: A 1D tensor. \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64".
+Defaults to "int32". \n
+
+*@par Outputs:
+*@li y: "x" in the unique output "y".
+*@li idx: A tensor the same size as "x". The index of each value of "x".
+*@li count: A tensor the same size as "x". The index of each value of "x". \n
+
+*@attention Constraints:
+*UniqueWithCountsExt2 runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UniqueWithCountsExt2.
+*/
+
+REG_OP(UniqueWithCountsExt2)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING }))
+    .INPUT(axis, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING }))
+    .OUTPUT(idx, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(count, TensorType({ DT_INT32, DT_INT64 }))
+    .REQUIRED_ATTR(out_idx, Type)
+    .OP_END_FACTORY_REG(UniqueWithCountsExt2)
+
+/**
+*@brief Fills the tensor with the mirror value. \n
+
+*@par Inputs:
+*Inputs "x" and "paddings" are 1D scalars.
+* @li x: The tensor to be padded.
+* @li paddings: A two-column matrix specifying the padding sizes.
+The number of rows Has the same rank as "x". \n
+
+*@par Attributes:
+*mode: Either "REFLECT" or "SYMMETRIC". In reflect mode the padded regions
+do not include the borders, while in symmetric mode the padded regions
+do include the borders. \n
+
+*@par Outputs:
+*y: The padded tensor. \n
+
+*@attention Constraints:
+*MirrorPad runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MirrorPad.
+*/
+
+REG_OP(MirrorPad)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, \
+      DT_COMPLEX64, DT_COMPLEX128 }))
+    .INPUT(paddings, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, \
+      DT_COMPLEX64, DT_COMPLEX128 }))
+    .REQUIRED_ATTR(mode, String)
+    .OP_END_FACTORY_REG(MirrorPad)
+
+/**
+*@brief Calculates the difference between two numbers or a list of strings. \n
+
+*@par Inputs:
+*Inputs "x" and "y" are 1D vectors.
+* @li x: A Tensor. 1D. Values to keep.
+* @li y: A Tensor. Must have the same type as x. 1D. Values to remove. \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64". Defaults to "int32". \n
+
+*@par Outputs:
+*@li out: A Tensor. Has the same type as "x".
+*@li idx: A Tensor of type "out_idx". \n
+
+*@attention Constraints:
+*ListDiff runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ListDiff.
+*/
+
+REG_OP(ListDiff)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+        DT_INT16, DT_UINT16, DT_INT32, DT_INT64}))
+    .INPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+        DT_INT16, DT_UINT16, DT_INT32, DT_INT64}))
+    .OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+        DT_INT16, DT_UINT16, DT_INT32, DT_INT64}))
+    .OUTPUT(idx, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(out_idx, Type, DT_INT32)
+    .OP_END_FACTORY_REG(ListDiff)
+
+/**
+*@brief Create an empty tensor, using the shape and dtype specified in attributes. \n
+
+*@par Attributes:
+*@li dtype: Specify the data type of the empty tensor.
+*@li shape: Specify the shape of the empty tensor. \n
+
+*@par Outputs:
+*y: The empty constant tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator _ParallelConcatStart.
+*/
+REG_OP(_ParallelConcatStart)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(dtype, Type, DT_INT32)
+    .ATTR(shape, ListInt, {})
+    .OP_END_FACTORY_REG(_ParallelConcatStart)
+
+/**
+*@brief Creates a constant tensor from a tensor-like object. This operator is used for inference.
+Operator Const has the same definition as operator Constant. \n
+
+*@par Attributes:
+*value: Required. The value and type of the resulting tensor, and no restrictions on type. \n
+
+*@par Outputs:
+*y: A constant tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Const.
+*/
+REG_OP(Const)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(value, Tensor, Tensor())
+    .OP_END_FACTORY_REG(Const)
+
+/**
+*@brief Creates a constant tensor for training. \n
+
+*@par Attributes:
+*value: Required. The value and type of the resulting tensor, and no restrictions on type. \n
+
+*@par Outputs:
+*y: The constant tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Const.
+*/
+REG_OP(Constant)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(value, Tensor, Tensor())
+    .OP_END_FACTORY_REG(Constant)
+
+/**
+*@brief Returns a copy of the input tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Snapshot.
+*/
+REG_OP(Snapshot)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Snapshot)
+
+/**
+*@brief Gives a guarantee to the runtime that the input tensor is a constant. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: The input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator GuaranteeConst.
+*/
+REG_OP(GuaranteeConst)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(GuaranteeConst)
+
+/**
+*@brief Returns the target shape for broadcasting shapes "x1" and "x2". \n
+
+*@par Inputs:
+*@li x1: A tensor of type int32 or int64. A shape.
+*@li x2: A tensor of the same type as "x1". The other shape. \n
+
+*@par Outputs:
+*y: A tensor. The broadcasted shape. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BroadcastArgs.
+*/
+REG_OP(BroadcastArgs)
+    .INPUT(x1, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(BroadcastArgs)
+
+/**
+*@brief Outputs its input tensor as is and triggers an error if a gradient is requested. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*message: Will be printed in the error at the attempt to request a gradient. \n
+
+*@par Outputs:
+*y: The input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator PreventGradient.
+*/
+REG_OP(PreventGradient)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(message, String, "")
+    .OP_END_FACTORY_REG(PreventGradient)
+
+/**
+*@brief Returns the reduction indices for computing gradients of "x1" and "x2" with broadcast. \n
+
+*@par Inputs:
+*@li x1: A tensor of type int32 or int64.
+*@li x2: A tensor of type int32 or int64.
+"x2" has the same type as "x1". \n
+
+*@par Outputs:
+*@li y1: A tensor. Reduction indices of "x1".
+*@li y2: A tensor. Reduction indices of "x2". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BroadcastGradientArgs.
+*/
+REG_OP(BroadcastGradientArgs)
+    .INPUT(x1, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y1, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y2, TensorType({DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(BroadcastGradientArgs)
+
+/**
+*@brief Stops gradient computation. None is returned for the node where the gradient computation is stopped.
+
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: The input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator StopGradient.
+*/
+REG_OP(StopGradient)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(StopGradient)
+
+/**
+*@brief Return a tensor with the same shape and contents as input. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Identity.
+*/
+REG_OP(Identity)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Identity)
+
+/**
+*@brief Returns a list of tensors with the same shapes and contents as the input tensors. \n
+
+*@par Inputs:
+*x: A list of input tensors. It's a dynamic input \n
+
+*@par Outputs:
+*y: A list of Tensor objects, with the same length as the input tensor list.
+It's a dynamic output. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator IdentityN.
+*/
+REG_OP(IdentityN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(IdentityN)
+
+/**
+*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n
+
+*@par Inputs:
+*@li x: A tensor.
+*@li axis: The dimension index at which to expand. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ExpandDims.
+*/
+REG_OP(ExpandDims)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+        DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .INPUT(axis, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+        DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(ExpandDims)
+
+/**
+*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n
+
+*@par Inputs:
+*@li x: Original tensor.
+*@li axis: List of ints. \n
+
+*@par Outputs:
+*y: Reshape tensor with same data as input. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Onnx operator Unsqueeze.
+*/
+
+REG_OP(Unsqueeze)
+    .INPUT(x, TensorType({DT_FLOAT32, DT_INT32, DT_UINT8, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT32, DT_INT32, DT_UINT8, DT_BOOL}))
+    .ATTR(axes, ListInt, {})
+    .OP_END_FACTORY_REG(Unsqueeze)
+
+/**
+*@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n
+
+*@par Inputs:
+*@li x: A tensor.
+*@li shape: A tensor. Defines the shape of the output tensor. \n
+
+*@par Attributes:
+*@li axis: An optional int32 or int64. The first dimension to reshape. Defaults to "0".
+*@li num_axes: An optional int32 or int64. The extent of the reshape. Defaults to "-1". \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Attention:
+*This operator cannot be directly called by the acllopExecute API. \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator Reshape.
+*@li Compatible with the Caffe operator Reshape.
+*/
+REG_OP(Reshape)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+        DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+        DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(axis, Int, 0)
+    .ATTR(num_axes, Int, -1)
+    .OP_END_FACTORY_REG(Reshape)
+
+/**
+*@brief Removes dimensions of size 1 from the shape of a tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1.   If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Squeeze.
+*/
+REG_OP(Squeeze)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(axis, ListInt, {})
+    .OP_END_FACTORY_REG(Squeeze)
+
+/**
+*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: A tensor. The rank of input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Rank.
+*/
+REG_OP(Rank)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(Rank)
+
+/**
+*@brief Returns the size of a tensor, that is, an integer of the number of elements of the tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*out_type: An optional int32 or int64. The output data type. Defaults to "int32". \n
+
+*@par Outputs:
+*y: A tensor. The size of the input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Size.
+*/
+REG_OP(Size)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32,DT_INT64}))
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(Size)
+
+/**
+*@brief Input data for other operators. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*index: Index of the input tensor.The data type must be int32 or int64.
+Assume that net has three data nodes, one should be set 0, another should
+be set 1, and the left should be set 2. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Caffe operator Data.
+*/
+REG_OP(Data)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(index, Int, 0)
+    .OP_END_FACTORY_REG(Data)
+
+/**
+*@brief Inserts a placeholder for a tensor that will be always fed. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*@li peerIndex: An integer type. The index of the corresponding "end" node connected to.
+*@li parentId: A string, used to check if the nodes are from the saved parent node.
+*@li parentOpType: A string. Op type of the original node.
+*@li anchorIndex: An integer, used to check if the node is from the saved anchor. \n
+
+*@par Outputs:
+*y: The created placeholder tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator PlaceHolder.
+*/
+REG_OP(PlaceHolder)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(peerIndex, Int, 0) // the index of the corresponding 'end' node it's connected to
+    .ATTR(parentId, String, "")     // check if these node are from save parent node
+    .ATTR(parentOpType, String, "") // op type of original node
+    .ATTR(anchorIndex, Int, 0)  // check if these node are from save anchor
+    .OP_END_FACTORY_REG(PlaceHolder)
+
+/**
+*@brief Inserts a placeholder with default value for a tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*@li dtype: data type of tensor.
+*@li shape: tensor shape. \n
+
+*@par Outputs:
+*y: The created placeholder tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator PlaceholderWithDefault.
+*/
+REG_OP(PlaceholderWithDefault)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(PlaceholderWithDefault)
+
+/**
+*@brief Reads and returns the value of the input variable tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*dtype: An optional int32 or int64. The output data type. Defaults to int32. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ReadVariableOp.
+*/
+REG_OP(ReadVariableOp)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                           DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(ReadVariableOp)
+
+/**
+*@brief Mark outputs of one sub graph which partitioned by engine type.
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Attributes:
+*@li peerIndex: The index of the corresponding 'placeholder' node it's connected to.
+*@li parentOpType: Op type of original node.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(End)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(peerIndex, Int, 0)
+    .ATTR(parentOpType, String, "")
+    .OP_END_FACTORY_REG(End)
+
+/**
+*@brief Operations for writing summary data, for use in analysis and visualization.
+
+*@par Inputs:
+* One input:
+*x: Collections of summary data.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Summary)
+    .INPUT(x, TensorType::ALL())
+    .OP_END_FACTORY_REG(Summary)
+
+/**
+*@brief Returns the shape of a tensor. \n
+
+*@par Inputs:
+*x: A tensor. \n
+
+*@par Attributes:
+*dtype: An optional int32 or int64. The output data type. Defaults to int32. \n
+
+*@par Outputs:
+*y: A tensor. The shape of the input tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Size.
+*/
+REG_OP(Shape)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(Shape)
+
+/**
+*@brief Returns shape of tensors. \n
+
+*@par Inputs:
+*x: A list of input tensors. It's a dynamic input. \n
+
+*@par Attributes:
+*dtype: An optional int32 or int64. The output data type. Defaults to "int32". \n
+
+*@par Outputs:
+*y: A list of tensors with the same length as the input list of tensors.
+It's a dynamic output. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ShapeN.
+*/
+REG_OP(ShapeN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Int, DT_INT32)
+    .OP_END_FACTORY_REG(ShapeN)
+
+/**
+*@brief Creates a tensor with the given "shape" and "dtype". \n
+
+*@par Inputs:
+*shape: The shape of the output tensor. \n
+
+*@par Attributes:
+*@li dtype: Optional. The data type of the output tensor. Defaults to "int32".
+*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". Defaults to "false". \n
+
+*@par Outputs:
+*y: A tensor. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Empty.
+*/
+REG_OP(Empty)
+    .INPUT(shape, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(dtype, Int, DT_INT32)
+    .ATTR(init, Bool, 0)
+    .OP_END_FACTORY_REG(Empty)
+
+/**
+*@brief Gradient op for MirrorPad op. Folds a mirror-padded tensor. \n
+
+*@par Inputs:
+*Inputs "x" and "y" are 1D vectors.
+* @li x: A Tensor. The input tensor to be folded.
+* @li paddings: A Tensor of type int32 or int64. A two-column matrix
+specifying the padding sizes. \n
+
+*@par Attributes:
+*mode: A string from: "REFLECT", "SYMMETRIC". The mode used in the MirrorPad op. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@attention Constraints:
+*MirrorPadGrad runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MirrorPadGrad.
+*/
+
+REG_OP(MirrorPadGrad)
+    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+              DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+              DT_COMPLEX64, DT_COMPLEX128 }))
+    .INPUT(paddings, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+              DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+              DT_COMPLEX64, DT_COMPLEX128 }))
+    .REQUIRED_ATTR(mode, String)
+    .OP_END_FACTORY_REG(MirrorPadGrad)
+
+/**
+*@brief Returns locations of nonzero / true values in a tensor. \n
+
+*@par Inputs:
+*Including:
+*x: A Tensor. Must be one of the following types:
+DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
+DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL. \n
+
+*@par Outputs:
+*y: A Tensor of type DT_INT64. \n
+
+*@attention Constraints:
+*Where runs on the Ascend AI CPU, which delivers poor performance.\n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Where.
+*/
+
+REG_OP(Where)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(Where)
+
+/**
+*@brief Derived from the Caffe operator Split that splits an input blob to
+*    multiple output blobs for feeding a blob into multiple output layers.
+*The Split node is removed from the graph after the split operation is completed. \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+fp16, fp32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. \n
+
+*@par Attributes:
+*@li N: A required int. The parameter will get the number of dynamic outputs.
+*/
+REG_OP(Copy)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(Copy);
+
+/**
+*@brief Generates fingerprint values. \n
+
+*@par Inputs:
+*@li data: Must have rank 1 or higher.
+*@li method: Fingerprint method used by this op. Currently available method is
+`farmhash::fingerprint64`. \n
+
+*@par Outputs:
+y: A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to
+`data`'s first dimension, and the second dimension size depends on the
+fingerprint algorithm. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow Fingerprint operator.
+*/
+
+REG_OP(Fingerprint)
+    .INPUT(data, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .INPUT(method, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_UINT8}))
+    .OP_END_FACTORY_REG(Fingerprint)
+
+/**
+*@brief Change the shape of output according to the attr outShape
+*
+
+*@par Inputs:
+*x: A Tensor. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. \n
+
+*@par Attributes:
+*outShape: The shape of output will be inferred according to the attribute
+*/
+REG_OP(TransShape)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(outShape,ListInt ,{})
+    .OP_END_FACTORY_REG(TransShape);
+
+/**
+*@brief Computes the (possibly normalized) Levenshtein Edit Distance. \n
+
+*@par Inputs:
+*@li hypothesis_indices: The indices of the hypothesis list SparseTensor.
+This is an N x R int64 matrix.
+*@li hypothesis_shape: The values of the hypothesis list SparseTensor.
+This is an N-length vector.
+*@li hypothesis_shape: The shape of the hypothesis list SparseTensor.
+This is an R-length vector.
+*@li truth_indices: The indices of the truth list SparseTensor.
+This is an M x R int64 matrix.
+*@li truth_shape: The values of the truth list SparseTensor.
+This is an M-length vector.
+*@li truth_shape: The shape of the truth list SparseTensor.
+This is an R-length vector
+
+*@par Attributes:
+*@li normalize: boolean (if true, edit distances are normalized by length of truth). \n
+
+*@par Outputs:
+*@li output: A dense float tensor with rank R - 1. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow EditDistance operator.
+*/
+REG_OP(EditDistance)
+    .INPUT(hypothesis_indices, TensorType({DT_INT64}))
+    .INPUT(hypothesis_values, TensorType::BasicType())
+    .INPUT(hypothesis_shape, TensorType({DT_INT64}))
+    .INPUT(truth_indices, TensorType({DT_INT64}))
+    .INPUT(truth_values, TensorType::BasicType())
+    .INPUT(truth_shape, TensorType({DT_INT64}))
+    .ATTR(normalize, Bool, true)
+    .OUTPUT(output, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(EditDistance)
+
+/**
+* @brief sort_v2.
+
+* @par Inputs:
+* @li x: An ND tensor of type float16.
+
+* @par Attributes:
+
+* @li axis: An optional int. The dimension to sort along. This value defaults to -1.
+* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.
+
+* @par Outputs:
+* @li y: An ND tensor of type float16.
+
+* @attention Constraints:
+* @li Axis should select the last dim.
+* @li When the sorting data is less than 150K, it is recommended to use this tbe ops,
+ and the descending performance is better than the ascending.
+* @li The upper limit of data on Ascend910 is 2000K.
+*/
+REG_OP(SortV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(axis, Int, -1)
+    .ATTR(descending, Bool, false)
+    .OP_END_FACTORY_REG(SortV2)
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8 ,uint8. \n
+* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(Expand)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OP_END_FACTORY_REG(Expand)
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8 ,uint8. \n
+
+* @par Attributes:
+* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n
+
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(ExpandD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(ExpandD)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/inc/ops/audio_ops.h
new file mode 100644
index 00000000..f05135d1
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/audio_ops.h
@@ -0,0 +1,162 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file audio_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Mel-Frequency Cepstral Coefficient (MFCC) calculation consists of
+taking the DCT-II of a log-magnitude mel-scale spectrogram . \n
+
+*@par Inputs:
+*Input "spectrogram" is a 3D tensor. Input "sample_rate" is a scalar.
+* @li spectrogram: A 3D float tensor.
+* @li sample_rate: The MFCC sample rate . \n
+
+*@par Attributes:
+*@li upper_frequency_limit: The highest frequency for calculation.
+*@li lower_frequency_limit: The lowest frequency for calculation.
+*@li filterbank_channel_count: Resolution of the Mel bank.
+*@li dct_coefficient_count: Number of output channels to produce
+per time slice . \n
+
+*@par Outputs:
+*y: A Tensor of type float32 . \n
+
+*@attention Constraints:
+*Mfcc runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Mfcc . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Mfcc)
+    .INPUT(spectrogram, TensorType({DT_FLOAT}))
+    .INPUT(sample_rate, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(upper_frequency_limit, Float, 4000)
+    .ATTR(lower_frequency_limit, Float, 20)
+    .ATTR(filterbank_channel_count, Int, 40)
+    .ATTR(dct_coefficient_count, Int, 13)
+    .OP_END_FACTORY_REG(Mfcc)
+
+/**
+*@brief Decodes and generates spectrogram using wav float tensor . \n
+
+*@par Inputs:
+*Input "x" is a 2D matrix.
+* x: A float tensor. Float representation of audio data . \n
+
+*@par Attributes:
+*@li window_size: Size of the spectrogram window.
+*@li stride: Size of the spectrogram stride.
+*@li magnitude_squared: If true, uses squared magnitude . \n
+
+*@par Outputs:
+*spectrogram: A 3D float Tensor . \n
+
+*@attention Constraints:
+*AudioSpectrogram runs on the Ascend AI CPU, which delivers
+poor performance . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AudioSpectrogram . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+
+REG_OP(AudioSpectrogram)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .OUTPUT(spectrogram, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(window_size, Int)
+    .REQUIRED_ATTR(stride, Int)
+    .ATTR(magnitude_squared, Bool, false)
+    .OP_END_FACTORY_REG(AudioSpectrogram)
+
+/**
+*@brief Decodes a 16-bit WAV file into a float tensor . \n
+
+*@par Inputs:
+*contents: A Tensor of type string. The WAV-encoded audio, usually from a file . \n
+
+*@par Attributes:
+*@li desired_channels: An optional int. Defaults to "-1".
+Number of sample channels wanted.
+*@li desired_samples: An optional int. Defaults to "-1".
+Length of audio requested . \n
+
+*@par Outputs:
+*@li *audio: A Tensor of type float32.
+*@li *sample_rate: A Tensor of type int32 . \n
+
+*@attention Constraints:
+*DecodeWav runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator DecodeWav . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+
+REG_OP(DecodeWav)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(audio, TensorType({DT_FLOAT}))
+    .OUTPUT(sample_rate, TensorType({DT_INT32}))
+    .ATTR(desired_channels, Int, -1)
+    .ATTR(desired_samples, Int, -1)
+    .OP_END_FACTORY_REG(DecodeWav)
+
+/**
+*@brief Encode audio data using the WAV file format . \n
+
+*@par Inputs:
+*Including:
+* @li audio: A Tensor of type DT_FLOAT.
+* @li sample_rate: A Tensor of type DT_INT32 . \n
+
+*@par Outputs:
+*contents: A Tensor of type DT_STRING . \n
+
+*@attention Constraints:
+*EncodeWav runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Operator EncodeWav . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(EncodeWav)
+    .INPUT(audio, TensorType({DT_FLOAT}))
+    .INPUT(sample_rate, TensorType({DT_INT32}))
+    .OUTPUT(contents, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(EncodeWav)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/inc/ops/batch_ops.h
new file mode 100644
index 00000000..181bf694
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/batch_ops.h
@@ -0,0 +1,166 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file batch_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Creates batches of tensors in "x_tensors" .   \n
+
+*@par Inputs:
+*Input "x_tensors" is a list or a dictionary of tensors.
+*x_tensors: The list or dictionary of tensors to enqueue .
+It's a dynamic input  \n
+
+*@par Attributes:
+*@li num_batch_threads: The number of threads enqueuing "x_tensors".
+The batching will be nondeterministic if "num_batch_threads" > 1.
+*@li max_batch_size: The maximum batch size pulled from the queue.
+*@li max_enqueued_batches: The maximum number of batches pulled from the queue.
+*@li batch_timeout_micros: The batch processing timeout, in microseconds.
+*@li allowed_batch_sizes: The allowed batch size pulled from the queue.
+*@li grad_timeout_micros: The gradient batch processing timeout,
+in microseconds.
+*@li container: If non-empty, this queue is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: If set, this queue will be shared under the given name
+across multiple sessions.
+*@li batching_queue: The queue resource container .   \n
+
+*@par Outputs:
+*@li y_index: A Tensor. The index of a BatchTensor. Must be in row-major order.
+*@li y_id: A Tensor. The ID of a BatchTensor. Must be in row-major order.
+*@li y_tensors: A list or dictionary of tensors with
+the same types as "x_tensors" .  It's a dynamic output.  \n
+
+*@attention Constraints:
+*Batch runs on the Ascend AI CPU, which delivers poor performance.   \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Batch.
+*/
+
+REG_OP(Batch)
+  .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
+      DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
+  .OUTPUT(y_index, TensorType({ DT_INT64 }))
+  .OUTPUT(y_id, TensorType({ DT_INT64 }))
+  .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
+      DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
+  .REQUIRED_ATTR(num_batch_threads, Int)
+  .REQUIRED_ATTR(max_batch_size, Int)
+  .ATTR(max_enqueued_batches, Int, 10)
+  .REQUIRED_ATTR(batch_timeout_micros, Int)
+  .ATTR(allowed_batch_sizes, ListInt, {})
+  .REQUIRED_ATTR(grad_timeout_micros, Int)
+  .ATTR(container, String, "")
+  .ATTR(shared_name, String, "")
+  .ATTR(batching_queue, String, "")
+  .OP_END_FACTORY_REG(Batch)
+
+/**
+*@brief Reverses the operation of Batch for a single output Tensor .   \n
+
+*@par Inputs:
+*Input "x_tensors" is a list or a dictionary of tensors.
+* @li x_tensors: The list or dictionary of tensors to enqueue.
+* @li index: The matching "batch_index" obtained from Batch.
+* @li id: The "id" scalar emitted by Batch .   \n
+
+*@par Attributes:
+*@li timeout_micros: The unbatch processing timeout, in microseconds.
+*@li container: If non-empty, this queue is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: If set, this queue will be shared under the given name
+across multiple sessions .   \n
+
+*@par Outputs:
+*y_tensor: A list or dictionary of tensors with the same types as "x_tensors" .   \n
+
+*@attention Constraints:
+*Unbatch runs on the Ascend AI CPU, which delivers poor performance.   \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Unbatch.
+*/
+
+REG_OP(Unbatch)
+  .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
+  .INPUT(index, TensorType({DT_INT64}))
+  .INPUT(id, TensorType({DT_INT64}))
+  .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
+  .REQUIRED_ATTR(timeout_micros, Int)
+  .ATTR(container, String, "")
+  .ATTR(shared_name, String, "")
+  .OP_END_FACTORY_REG(Unbatch)
+
+/**
+*@brief Acts like Batch but using the given "batch_index" index of batching
+things as they become available .   \n
+
+*@par Inputs:
+*Input "x_input" is a list or a dictionary of tensors.
+* @li x_input: The input to the Unbatch operation.
+* @li index: The batch_index given to the Unbatch operation.
+* @li id: The "id" scalar emitted by Batch.
+* @li grad: The downstream gradient .   \n
+
+*@par Attributes:
+*@li container: If non-empty, this queue is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: If set, this queue will be shared under the given name
+across multiple sessions .   \n
+
+*@par Outputs:
+*y_grad: The return value, either an empty tensor or the batched gradient .   \n
+
+*@attention Constraints:
+*UnbatchGrad runs on the Ascend AI CPU, which delivers poor performance.   \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UnbatchGrad.
+*/
+
+REG_OP(UnbatchGrad)
+  .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
+  .INPUT(index, TensorType({DT_INT64}))
+  .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
+  .INPUT(id, TensorType({DT_INT64}))
+  .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
+  .ATTR(container, String, "")
+  .ATTR(shared_name, String, "")
+  .OP_END_FACTORY_REG(UnbatchGrad)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/inc/ops/bitwise_ops.h
new file mode 100644
index 00000000..39a28cf3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/bitwise_ops.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file bitwise_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Element-wise computes the bitwise right-shift of x and y . \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+* @li x: A Tensor. Must be one of the following types: int8, int16, int32,
+int64, uint8, uint16, uint32, uint64.
+* @li y: A Tensor. Has the same type as "x".  \n
+
+*@par Outputs:
+* z: A Tensor. Has the same type as "x".  \n
+
+*@attention Constraints:
+*Unique runs on the Ascend AI CPU, which delivers poor performance.  \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator RightShift.
+*/
+
+REG_OP(RightShift)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+            DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OP_END_FACTORY_REG(RightShift)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h
new file mode 100644
index 00000000..08e54824
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file boosted_trees_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Bucketizes each feature based on bucket boundaries . \n
+
+*@par Inputs:
+*Input "float_values" is a 1D tensor. Input "bucket_boundaries" is
+a list of 1D tensors. It's a dynamic input.
+* @li float_values: A list of rank 1 tensors each containing float
+values for a single feature.
+* @li bucket_boundaries: A list of rank 1 tensors each containing
+the bucket boundaries for a single feature . It's a dynamic input. \n
+
+*@par Attributes:
+*@li num_features: Number of features
+
+*@par Outputs:
+*@li y: A list of rank 1 tensors each containing the bucketized values for
+a single feature . \n
+
+*@attention Constraints:
+*BoostedTreesBucketize runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BoostedTreesBucketize . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BoostedTreesBucketize)
+    .DYNAMIC_INPUT(float_values, TensorType({DT_FLOAT}))
+    .DYNAMIC_INPUT(bucket_boundaries, TensorType({DT_FLOAT}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(num_features, Int)
+    .OP_END_FACTORY_REG(BoostedTreesBucketize)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h
new file mode 100644
index 00000000..890c52ae
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h
@@ -0,0 +1,415 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file candidate_sampling_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Generates labels for candidate sampling with
+a learned unigram distribution. \n
+
+*@par Inputs:
+*Input "true_classes" is a 2D matrix.
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection,
+so that all sampled candidates in a batch are unique.
+*This requires some approximation to estimate the post-rejection
+sampling probabilities.
+*@li range_max: The sampler will sample integers from the interval
+[0, range_max).
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled", in which each
+element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing
+the number of times each candidate is expected to occur in a batch of sampled
+candidates. If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled",
+for each sampled candidate.
+*representing the number of times the candidate is expected to occur
+in a batch of sampled candidates.
+* If "unique" is true, then this is a probability.
+
+*@attention Constraints:
+*ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU,
+which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ThreadUnsafeUnigramCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .REQUIRED_ATTR(num_sampled, Int)
+    .REQUIRED_ATTR(unique, Bool)
+    .REQUIRED_ATTR(range_max, Int)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(ThreadUnsafeUnigramCandidateSampler)
+
+/**
+*@brief Generates labels for candidate sampling with a learned
+unigram distribution. \n
+
+*@par Inputs:
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label.
+*Input "true_classes" is a 2D matrix. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection,
+so that all sampled candidates in a batch are unique.
+*This requires some approximation to estimate the post-rejection
+sampling probabilities.
+*@li range_max: The sampler will sample integers from the interval
+[0, range_max).
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled",
+in which each element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing the
+number of times each candidate is expected to occur
+in a batch of sampled candidates.
+*If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled", for each
+sampled candidate representing the number of times.
+* the candidate is expected to occur in a batch of sampled candidates.
+*If "unique" is true, then this is a probability. \n
+
+*@attention Constraints:
+*UniformCandidateSampler runs on the Ascend AI CPU,
+which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator UniformCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UniformCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .REQUIRED_ATTR(num_sampled, Int)
+    .REQUIRED_ATTR(unique, Bool)
+    .REQUIRED_ATTR(range_max, Int)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(UniformCandidateSampler)
+
+/**
+*@brief Generates labels for candidate sampling with a learned
+unigram distribution. \n
+
+*@par Inputs:
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label.
+* Input "true_classes" is a 2D matrix. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection,
+so that all sampled candidates in a batch are unique. This requires
+some approximation to estimate the post-rejection sampling probabilities.
+*@li range_max: The sampler will sample integers from the interval [0, range_max).
+*@li vocab_file: Each valid line in this file (which should have a
+CSV-like format) corresponds to a valid word ID.
+*IDs are in sequential order, starting from num_reserved_ids.
+*@li distortion: The distortion is used to skew the unigram probability
+distribution. Each weight is first raised to the distortion's power before
+adding to the internal unigram distribution.
+*@li num_reserved_ids: Optionally some reserved IDs can be added in the range
+[0, ..., num_reserved_ids) by the users.
+* One use case is that a special unknown word token is used as ID 0.
+*@li num_shards: A sampler can be used to sample from a subset of the
+original range. in order to speed up the whole computation through parallelism.
+*@li shard: A sampler can be used to sample from a subset of the original
+range in order to speed up the whole computation through parallelism.
+*@li unigrams: A list of unigram counts or probabilities, one per ID in
+sequential order.
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled", in which each
+element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing the
+number of times each candidate is expected to occur in a batch of sampled
+candidates. If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled",
+for each sampled candidate representing the number of times the candidate is
+expected to occur in a batch of sampled candidates.
+If "unique" is true, then this is a probability. \n
+
+*@attention Constraints:
+* FixedUnigramCandidateSampler runs on the Ascend AI CPU,
+which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FixedUnigramCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(FixedUnigramCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .ATTR(num_true, Int, 0)
+    .ATTR(num_sampled, Int, 0)
+    .ATTR(unique, Bool, false)
+    .ATTR(range_max, Int, 0)
+    .ATTR(vocab_file, String, "")
+    .ATTR(distortion, Float, 1.0)
+    .ATTR(num_reserved_ids, Int, 0)
+    .ATTR(num_shards, Int, 1)
+    .ATTR(shard, Int, 0)
+    .REQUIRED_ATTR(unigrams, ListFloat)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(FixedUnigramCandidateSampler)
+
+/**
+*@brief Generates labels for candidate sampling with a learned
+unigram distribution. \n
+
+*@par Inputs:
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label.
+* Input "true_classes" is a 2D matrix. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection,
+so that all sampled candidates in a batch are unique.
+*This requires some approximation to estimate the post-rejection
+sampling probabilities.
+*@li range_max: The sampler will sample integers from the interval
+[0, range_max).
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled", in which each
+element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing
+the number of times each candidate is expected to occur in a batch of sampled candidates.
+*If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled", for each
+sampled candidate representing the number of times the candidate is expected
+to occur in a batch of sampled candidates.
+*If "unique" is true, then this is a probability. \n
+
+*@attention Constraints:
+*LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers
+poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LearnedUnigramCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .REQUIRED_ATTR(num_sampled, Int)
+    .REQUIRED_ATTR(unique, Bool)
+    .REQUIRED_ATTR(range_max, Int)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(LearnedUnigramCandidateSampler)
+
+/**
+*@brief Generates labels for candidate sampling with a log-uniform
+distribution. \n
+
+*@par Inputs:
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label.
+* Input "true_classes" is a 2D matrix. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection, so that all
+sampled candidates in a batch are unique. This requires some approximation
+to estimate the post-rejection sampling probabilities.
+*@li range_max: The sampler will sample integers from the interval
+[0, range_max).
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled", in which each
+element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing
+the number of times each candidate is expected to occur in a batch of sampled
+candidates. If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled", for each
+sampled candidate representing the number of times the candidate is expected
+to occur in a batch of sampled candidates.
+*If "unique" is true, then this is a probability. \n
+
+*@attention Constraints:
+*LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers
+poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LogUniformCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LogUniformCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .REQUIRED_ATTR(num_sampled, Int)
+    .REQUIRED_ATTR(unique, Bool)
+    .REQUIRED_ATTR(range_max, Int)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(LogUniformCandidateSampler)
+
+/**
+*@brief Generates labels for candidate sampling with a learned
+unigram distribution. \n
+
+*@par Inputs:
+*true_classes: A "batch_size * num_true" matrix, in which each row contains
+the IDs of the "num_true" "target_classes" in the corresponding original label.
+* Input "true_classes" is a 2D matrix. \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li num_sampled: Number of candidates to randomly sample.
+*@li unique: If "unique" is true, samples with rejection,
+so that all sampled candidates in a batch are unique. This requires some
+approximation to estimate the post-rejection sampling probabilities.
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+*@li sampled_candidates: A vector of length "num_sampled",
+in which each element is the ID of a sampled candidate.
+*@li true_expected_count: A "batch_size * num_true" matrix, representing the
+number of times each candidate is expected to occur in a batch of sampled candidates.
+*If "unique" is true, then this is a probability.
+*@li sampled_expected_count: A vector of length "num_sampled", for each
+sampled candidate representing the number of times the candidate is expected
+to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. \n
+
+*@attention Constraints:
+*AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AllCandidateSampler. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AllCandidateSampler)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
+    .OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .REQUIRED_ATTR(num_sampled, Int)
+    .REQUIRED_ATTR(unique, Bool)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(AllCandidateSampler)
+
+/**
+*@brief Computes the "ids" of the positions in "sampled_candidates" that
+match "true_labels". \n
+
+*@par Inputs:
+* @li Input "true_classes" is a 2D matrix.
+* @li true_classes: The "true_classes" output of UnpackSparseLabels.
+* @li sampled_candidates: The "sampled_candidates" output of CandidateSampler.  \n
+
+*@par Attributes:
+*@li num_true: Number of true labels per context.
+*@li seed: If either "seed" or "seed2" are set to be non-zero.
+*@li seed2: A second seed to avoid seed collision. \n
+
+*@par Outputs:
+* @li indices: A vector of indices corresponding to rows of "true_candidates".
+* @li ids: A vector of IDs of positions in "sampled_candidates" that match a
+"true_label" for the row with the corresponding index in indices.
+* @li weights: A vector of the same length as "indices" and "ids", in which
+each element is -FLOAT_MAX. \n
+
+*@attention Constraints:
+*ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ComputeAccidentalHits. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ComputeAccidentalHits)
+    .INPUT(true_classes, TensorType({ DT_INT64 }))
+    .INPUT(sampled_candidates, TensorType({ DT_INT64 }))
+    .OUTPUT(indices, TensorType({ DT_INT32 }))
+    .OUTPUT(ids, TensorType({ DT_INT64 }))
+    .OUTPUT(weights, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(num_true, Int)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(ComputeAccidentalHits)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/inc/ops/condtake_ops.h
new file mode 100644
index 00000000..029cffbf
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/condtake_ops.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file condtake_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+*@brief Take elements from data if specific condition is satisfied on mask. \n
+
+*@par Inputs:
+*@li data: input tensor from which to take elements, High-dimension input would
+first be flattened.
+*@li mask: condition param; must be the same shape with data. \n
+
+*@par Attributes:
+*@li mode:convert by convert in Mode.
+*@li val:convert by <class 'float'>
+*@li eps:convert by <class 'float'> (default: 1e-06) \n
+
+*@par Outputs:
+*@li out_data: the elements taken
+*@li out_index: the indices corresponding to those elements
+*@li valid_num: elements of out_data and out_index from zeros to valid_num is valid.
+*/
+
+REG_OP(CondTake)
+    .INPUT(data, TensorType({DT_FLOAT}))
+    .INPUT(mask, TensorType({DT_FLOAT}))
+    .OUTPUT(out_data, TensorType({DT_FLOAT}))
+    .OUTPUT(out_index, TensorType({DT_INT32}))
+    .OUTPUT(valid_num, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(mode, String)
+    .REQUIRED_ATTR(val, Float)
+    .ATTR(eps, Float, 1e-06)
+    .OP_END_FACTORY_REG(CondTake)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/inc/ops/control_flow_ops.h
new file mode 100644
index 00000000..c0b6ad72
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/control_flow_ops.h
@@ -0,0 +1,407 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file control_flow_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+ *@brief Forwards the value of an available tensor from input "x" to output "y".
+ *       Merge waits for at least one of the input tensors to become available.
+ *       It is usually combined with Switch to implement branching.
+ *       Merge forwards the first tensor to become available to output "y",
+ *       and sets "value_index" the index of the tensor in inputs . \n
+
+ *@par Inputs:
+ *x: The input tensors, one of which will become available.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n
+
+ *@par Outputs:
+ *@li y: The available tensor. Has the same type as "x".
+ *@li value_index: A scalar of type int32, for the index of the chosen input
+ *                 tensor . \n
+
+ *@see Switch()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Merge.
+ */
+REG_OP(Merge)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(value_index, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(Merge)
+
+/**
+ *@brief Forwards the value of an available tensor from input "x" to output "y".
+ *       Merge waits for at least one of the input tensors to become available.
+ *       It is usually combined with Switch to implement branching.
+ *       Merge forwards the first tensor to become available to output "y",
+ *       and sets "value_index" the index of the tensor in inputs . \n
+
+ *@par Inputs:
+ *x: The input tensors, one of which will become available.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n
+
+ *@par Outputs:
+ *@li y: The available tensor. Has the same type as "x".
+ *@li value_index: A scalar of type int32, for the index of the chosen input
+ *                 tensor . \n
+
+ *@see Switch() | Merge()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator RefMerge.
+ */
+REG_OP(RefMerge)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(value_index, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(RefMerge)
+
+/**
+ *@brief Forwards "data" to the output port determined by "pred".
+ *       If "pred" is "true", the data input is forwarded to "output_true".
+ *       Otherwise, the data is forwarded to "output_false" . \n
+
+ *@par Inputs:
+ *@li data: The tensor to be forwarded. \ n
+ *          Must be one of the following types: float16, float32, float64,
+ *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
+ *@li pred: A boolean scalar. The output port that will receive data . \n
+
+ *@par Outputs:
+ *@li output_false: If "pred" is "false", data will be forwarded to this output.
+ *                  Has the same type as "data".
+ *@li output_true: If "pred" is "true", data will be forwarded to this output.
+ *                 Has the same type as "data" . \n
+
+ *@see Merge()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Switch.
+ */
+REG_OP(Switch)
+    .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .INPUT(pred, TensorType({DT_BOOL}))
+    .OUTPUT(output_false, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(output_true, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(Switch)
+
+/**
+ *@brief Forwards "data" to the output port determined by "pred".
+ *       If "pred" is "true", the data input is forwarded to "output_true".
+ *       Otherwise, the data is forwarded to "output_false" . \n
+
+ *@par Inputs:
+ *@li data: The ref tensor to be forwarded.
+ *          Must be one of the following types: float16, float32, float64,
+ *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
+ *@li pred: A boolean scalar. The output port that will receive data . \n
+
+ *@par Outputs:
+ *@li output_false: If "pred" is "false", data will be forwarded to this output.
+ *                  Has the same type as "data".
+ *@li output_true: If "pred" is "true", data will be forwarded to this output.
+ *                 Has the same type as "data" . \n
+
+ *@see Merge() | Switch()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator RefSwitch.
+ */
+REG_OP(RefSwitch)
+    .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .INPUT(pred, TensorType({DT_BOOL}))
+    .OUTPUT(output_false, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(output_true, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(RefSwitch)
+
+/**
+ *@brief Forwards "data" to the output port determined by "pred_value" . \n
+
+ *@par Inputs:
+ *@li data: The tensor to be forwarded. \ n
+ *          Must be one of the following types: float16, float32, float64,
+ *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
+ *@li pred_value: A int64 tensor which determines the output port that will receive data . \n
+
+ *@par Outputs:
+ *output: The output tensors, one of which will become available.
+ *        Has the same type as "data".
+ */
+REG_OP(SwitchN)
+    .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .INPUT(pred_value, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(SwitchN)
+
+/**
+ *@brief Creates or finds a child frame, and makes "x" available to the child
+ *       frame. This op is used together with Exit to create loops in the graph.
+ *       The Executor uses the unique "frame_name" to identify frames.
+ *       If "is_constant" is "true", output "y" is a constant in the child
+ *       frame; otherwise it may be changed in the child frame . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the child frame.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Attributes:
+ *@li frame_name: A required string. The name of the child frame.
+ *@li is_constant: A required bool. If true, the output is constant in
+ *                 the child frame . \n
+
+ *@par Outputs:
+ *y: A Tensor. Has the same type as "x" . \n
+
+ *@see Exit()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Enter.
+ */
+REG_OP(Enter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .REQUIRED_ATTR(frame_name, String)
+    .REQUIRED_ATTR(is_constant, Bool)
+    .OP_END_FACTORY_REG(Enter)
+
+/**
+ *@brief Creates or finds a child frame, and makes "x" available to the child
+ *       frame. This op is used together with Exit to create loops in the graph.
+ *       The Executor uses the unique "frame_name" to identify frames.
+ *       If "is_constant" is "true", output "y" is a constant in the child
+ *       frame; otherwise it may be changed in the child frame . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the child frame.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Attributes:
+ *@li frame_name: A required string. The name of the child frame.
+ *@li is_constant: A required bool. If true, the output is constant in
+ *                 the child frame . \n
+
+ *@par Outputs:
+ *y: A tensor. Has the same type as "x" . \n
+
+ *@see Exit() | Enter()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator RefEnter.
+ */
+REG_OP(RefEnter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .REQUIRED_ATTR(frame_name, String)
+    .REQUIRED_ATTR(is_constant, Bool)
+    .OP_END_FACTORY_REG(RefEnter)
+
+/**
+ *@brief Forwards the input to the output. This op represents the loop
+ *       termination condition . \n
+
+ *@par Inputs:
+ *x: A boolean scalar. The condition of the Switch op . \n
+
+ *@par Outputs:
+ *y: The tensor "x" . \n
+
+ *@see Switch()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator LoopCond.
+ */
+REG_OP(LoopCond)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(LoopCond)
+
+/**
+ *@brief Makes the input available to the next iteration . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the next iteration.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Outputs:
+ *y: A Tensor. Has the same type as "x" . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator NextIteration.
+ */
+REG_OP(NextIteration)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(NextIteration)
+
+/**
+ *@brief Makes the input available to the next iteration . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the next iteration.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Outputs:
+ *y: A tensor. Has the same type as "x" . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator RefNextIteration.
+ */
+REG_OP(RefNextIteration)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(RefNextIteration)
+
+/**
+ *@brief Exits the current frame to its parent frame . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the parent frame.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Outputs:
+ *y: A Tensor. Has the same type as "x" . \n
+
+ *@see Enter()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Exit.
+ */
+REG_OP(Exit)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(Exit)
+
+/**
+ *@brief Exits the current frame to its parent frame . \n
+
+ *@par Inputs:
+ *x: The tensor to be made available to the parent frame.
+ *   Must be one of the following types: float16, float32, float64, int8,
+ *   int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n
+
+ *@par Outputs:
+ *y: A tensor. Has the same type as "x" . \n
+
+ *@see Enter() | Exit()
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator RefExit.
+ */
+REG_OP(RefExit)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
+        DT_UINT64, DT_BOOL}))
+    .OP_END_FACTORY_REG(RefExit)
+
+/**
+ *@brief Only useful as a placeholder for control edges.
+ *       It is similar to a no-op that always produces a live control output
+ *       even when some control inputs are dead . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator ControlTrigger.
+ */
+REG_OP(ControlTrigger)
+    .OP_END_FACTORY_REG(ControlTrigger)
+
+/**
+*@brief Returns index of shape in the map.
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8.
+*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried.
+*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n
+
+*@par Outputs:
+*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(MapIndex)
+    .INPUT(x, TensorType({DT_INT32}))
+    .INPUT(data_seq, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(level_index, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(MapIndex)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/inc/ops/ctc_ops.h
new file mode 100644
index 00000000..c6a265cc
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/ctc_ops.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file ctc_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Calculates the CTC Loss (log probability) for each batch entry.
+Also calculates the gradient. \n
+
+*@par Inputs:
+*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+*@li labels_indices: The indices of a `SparseTensor<int32, 2>`.
+`labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+`(batch b, time t)`.
+*@li labels_values: The values (labels) associated with the given batch and time.
+*@li sequence_length: A vector containing sequence lengths (batch). \n
+
+*@par Outputs:
+*@li loss: A vector (batch) containing log-probabilities.
+*@li gradient: The gradient of `loss`.  3-D, shape: `(max_time x
+batch_size x num_classes)`. \n
+
+*@par Attributes:
+*@li preprocess_collapse_repeated: Scalar, if true then repeated labels are collapsed prior to
+the CTC calculation.If not specified, defaults to false
+*@li ctc_merge_repeated: Scalar. If set to false, *during* CTC calculation
+repeated non-blank labels will not be merged and are interpreted as
+individual labels.  This is a simplified version of CTC.
+If not specified, defaults to true. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow CTCLoss operator.
+*/
+REG_OP(CTCLoss)
+    .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(labels_indices, TensorType({DT_INT64}))
+    .INPUT(labels_values, TensorType({DT_INT32}))
+    .INPUT(sequence_length, TensorType({DT_INT32}))
+    .OUTPUT(loss, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(preprocess_collapse_repeated, Bool, false)
+    .ATTR(ctc_merge_repeated, Bool, true)
+    .ATTR(ignore_longer_outputs_than_inputs, Bool, false)
+    .OP_END_FACTORY_REG(CTCLoss)
+
+/**
+*@brief Performs greedy decoding on the logits given in inputs. \n
+
+*@par Inputs:
+*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n
+
+*@par Attributes:
+*@li merge_repeated: If True, merge repeated classes in output. \n
+
+*@par Outputs:
+*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
+of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].
+*@li decoded_values: Values vector, size: `(total_decoded_outputs)`,
+of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.
+*@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.
+Values are: `[batch_size, max_decoded_length]`.
+*@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence
+log-probabilities. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow CTCGreedyDecoder operator.
+*/
+REG_OP(CTCGreedyDecoder)
+    .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(sequence_length, TensorType({DT_INT32}))
+    .ATTR(merge_repeated, Bool, false)
+    .OUTPUT(decoded_indices, TensorType({DT_INT64}))
+    .OUTPUT(decoded_values, TensorType({DT_INT64}))
+    .OUTPUT(decoded_shape, TensorType({DT_INT64}))
+    .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(CTCGreedyDecoder)
+
+/**
+*@brief Performs beam search decoding on the logits given in input. \n
+
+*@par Inputs:
+*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n
+
+*@par Attributes:
+*@li merge_repeated: If True, merge repeated classes in output. \n
+
+*@par Outputs:
+*@li decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
+size `(total_decoded_outputs[j] x 2)`, has indices of a
+`SparseTensor<int64, 2>`.  The rows store: [batch, time].
+*@li decoded_values: A list (length: top_paths) of values vectors.  Vector j,
+size `(length total_decoded_outputs[j])`, has the values of a
+`SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.
+*@li decoded_shape: A list (length: top_paths) of shape vector.  Vector j,
+size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+Its values are: `[batch_size, max_decoded_length[j]]`.
+*@li log_probability: A matrix, shaped: `(batch_size x top_paths)`.  The
+sequence log-probabilities. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow CTCBeamSearchDecoder operator.
+*/
+REG_OP(CTCBeamSearchDecoder)
+    .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(sequence_length, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(beam_width, Int)
+    .REQUIRED_ATTR(top_paths, Int)
+    .ATTR(merge_repeated, Bool, true)
+    .DYNAMIC_OUTPUT(decoded_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(decoded_values, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(decoded_shape, TensorType({DT_INT64}))
+    .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(CTCBeamSearchDecoder)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/inc/ops/data_flow_ops.h
new file mode 100644
index 00000000..0043c027
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/data_flow_ops.h
@@ -0,0 +1,2344 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file data_flow_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
+
+#include <algorithm>
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief This operation returns true if the queue is closed and false if
+the queue is open. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*handle:A Tensor of type resource. The handle to a queue. \n
+
+*@par Outputs:
+*is_closed:A Tensor of type bool. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueIsClosed operator.
+*/
+
+REG_OP(QueueIsClosed)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(is_closed, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(QueueIsClosed)
+
+/**
+*@brief Computes the number of elements in the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*handle:A Tensor of type mutable resource. The handle to a queue. \n
+
+*@par Outputs:
+*size:A Tensor of type int32. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueSize operator.
+*/
+
+REG_OP(QueueSize)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(QueueSize)
+
+/**
+*@brief A queue that produces elements in first-in first-out order. \n
+
+*@par Attributes:
+*@li component_types: A list of DType objects. The length of component_types
+must equal the number of tensors in each queue element.
+*@li shapes:(Optional.) A list of fully-defined TensorShape objects with the
+same length as dtypes, or None.
+*@li capacity:An integer. The upper bound on the number of elements that may
+be stored in this queue.
+*@li container: An optional string. Defaults to "". If non-empty, this queue
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name:(Optional.) If non-empty, this queue will be shared under
+the given name across multiple sessions. \n
+
+*@par Outputs:
+*handle:A Tensor of type mutable resource. The handle to a queue. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow FIFOQueue operator.
+*/
+
+REG_OP(FIFOQueue)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .REQUIRED_ATTR(component_types, ListType)
+    .ATTR(shapes, ListListInt, {})
+    .ATTR(capacity, Int, -1)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(FIFOQueue)
+
+/**
+*@brief Enqueues a tuple of one or more tensors in the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*@li handle:A Tensor of type mutable resource. The handle to a queue.
+*@li components: A list of Tensor objects. One or more tensors from which
+the enqueued tensors should be taken. It's a dynamic input. \n
+
+*@par Attributes:
+*timeout_ms: An optional int. Defaults to -1. If the queue is full, this
+operation will block for up to timeout_ms milliseconds. Note: This option
+is not supported yet. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueEnqueue operator.
+*/
+
+REG_OP(QueueEnqueue)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .DYNAMIC_INPUT(components, TensorType({DT_FLOAT, DT_FLOAT16, \
+        DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, \
+        DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, DT_RESOURCE, \
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .ATTR(timeout_ms, Int, -1)
+    .OP_END_FACTORY_REG(QueueEnqueue)
+
+/**
+*@brief Enqueues zero or more tuples of one or more tensors in the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*@li handle:A Tensor of type mutable resource. The handle to a queue.
+*@li components: A list of Tensor objects. One or more tensors from which
+the enqueued tensors should be taken. It's a dynamic input. \n
+
+*@par Attributes:
+*timeout_ms: An optional int. Defaults to -1. If the queue is full, this
+operation will block for up to timeout_ms milliseconds. Note: This option
+is not supported yet. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueEnqueueMany operator.
+*/
+
+REG_OP(QueueEnqueueMany)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .DYNAMIC_INPUT(components, TensorType({DT_FLOAT, DT_FLOAT16, \
+        DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, \
+        DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, DT_RESOURCE, \
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .ATTR(timeout_ms, Int, -1)
+    .OP_END_FACTORY_REG(QueueEnqueueMany)
+
+/**
+*@brief Dequeues n tuples of one or more tensors from the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*handle:A Tensor of type mutable resource. The handle to a queue. \n
+
+*@par Attributes:
+*@li timeout_ms: An optional int. Defaults to -1. If the queue is empty, this
+operation will block for up to timeout_ms milliseconds. Note: This option is
+not supported yet.
+*@li component_types: A list of DTypes that has length >= 1. The type of each
+component in a tuple. \n
+
+*@par Outputs:
+*components:A list of Tensor objects of type component_types. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueDequeue operator.
+*/
+
+REG_OP(QueueDequeue)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .DYNAMIC_OUTPUT(components, TensorType({DT_FLOAT, DT_FLOAT16, \
+        DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, \
+        DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, DT_RESOURCE, \
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .ATTR(timeout_ms, Int, -1)
+    .REQUIRED_ATTR(component_types, ListType)
+    .OP_END_FACTORY_REG(QueueDequeue)
+
+/**
+*@brief Dequeues n tuples of one or more tensors from the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*@li handle:A Tensor of type mutable resource. The handle to a queue.
+*@li n: A Tensor of type int32. The number of tuples to dequeue. \n
+
+*@par Attributes:
+*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than
+n elements, this operation will block for up to timeout_ms milliseconds.
+Note: This option is not supported yet.
+*@li component_types: A list of DTypes that has length >= 1. The type of each
+component in a tuple. \n
+
+*@par Outputs:
+*components:A list of Tensor objects of type component_types. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueDequeueMany operator.
+*/
+
+REG_OP(QueueDequeueMany)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(n, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(components, TensorType({DT_FLOAT, DT_FLOAT16, \
+        DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, \
+        DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, DT_RESOURCE, \
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .ATTR(timeout_ms, Int, -1)
+    .REQUIRED_ATTR(component_types, ListType)
+    .OP_END_FACTORY_REG(QueueDequeueMany)
+
+/**
+*@brief Dequeues n tuples of one or more tensors from the given queue. \n
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*@li handle:A Tensor of type mutable resource. The handle to a queue.
+*@li n: A Tensor of type int32. The number of tuples to dequeue. \n
+
+*@par Attributes:
+*@li timeout_ms: An optional int. Defaults to -1. If the queue has fewer than
+n elements, this operation will block for up to timeout_ms milliseconds.
+Note: This option is not supported yet.
+*@li component_types: A list of DTypes that has length >= 1. The type of each
+component in a tuple. \n
+
+*@par Outputs:
+*components:A list of Tensor objects of type component_types. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueDequeueUpTo operator.
+*/
+
+REG_OP(QueueDequeueUpTo)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(n, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(components, TensorType({DT_FLOAT, DT_FLOAT16, \
+        DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, \
+        DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, DT_RESOURCE, \
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .ATTR(timeout_ms, Int, -1)
+    .REQUIRED_ATTR(component_types, ListType)
+    .OP_END_FACTORY_REG(QueueDequeueUpTo)
+
+/**
+*@brief Stage values similar to a lightweight Enqueue. \n
+
+*@par Inputs:
+*The input values must be a list of Tensor objects. Inputs include:
+*values: A list of Tensor objects. A list of data types that inserted values
+should adhere to. It's a dynamic input. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to 0. Maximum number of
+elements in the Staging Area. If > 0, inserts on the container will block
+when the capacity is reached.
+*@li memory_limit: An optional int that is >= 0. Defaults to 0. The maximum
+number of bytes allowed for Tensors in the Staging Area. If > 0, inserts will
+block until sufficient space is available.
+*@li container: An optional string. Defaults to "". If non-empty, this queue
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". It is necessary to
+match this name to the matching Unstage Op. \n
+
+*@see Unstage
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Stage operator.
+*/
+
+REG_OP(Stage)
+    .DYNAMIC_INPUT(values, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+        DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(Stage)
+
+/**
+*@brief Op removes all elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: A list of DTypes
+*@li memory_limit: An optional int that is >= 0. Defaults to 0.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "".
+*@li dtypes: A list of DTypes. \n
+
+*@see Stage
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StageClear operator.
+*/
+
+REG_OP(StageClear)
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(dtypes, ListType, {})
+    .OP_END_FACTORY_REG(StageClear)
+
+/**
+*@brief Op peeks at the values at the specified index. If the underlying
+container does not contain sufficient elements this op will block until it does. \n
+
+*@par Inputs:
+*The input values must be type int32. Inputs include:
+*values: A Tensor of type int32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to 0.
+*@li memory_limit: An optional int that is >= 0. Defaults to 0.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "".
+*@li dtypes: A list of DTypes that has length >= 1. \n
+
+*@par Outputs:
+*y:A list of Tensor objects of type dtypes. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StagePeek operator.
+*/
+
+REG_OP(StagePeek)
+    .INPUT(index, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT16, \
+                    DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+                    DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(dtypes, ListType, {})
+    .OP_END_FACTORY_REG(StagePeek)
+
+/**
+*@brief Op returns the number of elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to 0.
+*@li memory_limit: An optional int that is >= 0. Defaults to 0.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "".
+*@li dtypes: A list of DTypes that has length >= 1. \n
+
+*@par Outputs:
+*size:A Tensor of type int32. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StageSize operator.
+*/
+
+REG_OP(StageSize)
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(dtypes, ListType, {})
+    .OP_END_FACTORY_REG(StageSize)
+
+/**
+*@brief Pop the element at the top of the stack. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*handle: A Tensor of type resource. The handle to a stack. \n
+
+*@par Attributes:
+*elem_type: A DType. The type of the elem that is popped. \n
+
+*@par Outputs:
+*element:A Tensor of type elem_type. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StackPop operator.
+*/
+
+REG_OP(StackPop)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(element, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT16, \
+                     DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+                     DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .REQUIRED_ATTR(elem_type, Type)
+    .OP_END_FACTORY_REG(StackPop)
+
+/**
+*@brief Push an element onto the stack. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: A Tensor of type resource. The handle to a stack.
+*@li elem: A Tensor. The tensor to be pushed onto the stack. \n
+
+*@par Attributes:
+*swap_memory: An optional bool. Defaults to False. Swap elem to CPU. Default
+to false. \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as elem. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StackPush operator.
+*/
+
+REG_OP(StackPush)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(element, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT16, \
+                     DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+                     DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT16, \
+                     DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+                     DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .ATTR(swap_memory, Bool, false)
+    .OP_END_FACTORY_REG(StackPush)
+
+/**
+*@brief Close the stack. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*handle: A Tensor of type resource. The handle to a stack. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow StackClose operator.
+*/
+
+REG_OP(StackClose)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OP_END_FACTORY_REG(StackClose)
+
+/**
+*@brief Create a stack. \n
+
+*@par Inputs:
+*The input max_size must be type int32. Inputs include:
+*max_size: A Tensor of type int32. The number of elements of a stack. \n
+
+*@par Attributes:
+*@li stack_name: An optional string. Defaults to "".
+*@li elem_type: The elements type of the created Stack. \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. The handle to a stack. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Stack operator.
+*/
+
+REG_OP(Stack)
+    .INPUT(max_size, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(stack_name, String, "")
+    .REQUIRED_ATTR(elem_type, Type)
+    .OP_END_FACTORY_REG(Stack)
+
+/**
+*@brief Partitions "x" into "num_partitions" tensors using indices from "partitions". \n
+
+*@par Inputs:
+*Including:
+* @li x: The Tensor to be sliced. Must be one of the following types:
+DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING.
+* @li partitions: A Tensor of type DT_INT32, with any shape. The indices. \n
+
+*@par Attributes:
+*num_partitions: The number of partitions to output. \n
+
+*@par Outputs:
+*y: A list of tensors of type DT_INT32. \n
+
+*@attention Constraints:
+*DynamicPartition runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator DynamicPartition. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicPartition)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(partitions, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .ATTR(num_partitions, Int, 1)
+    .OP_END_FACTORY_REG(DynamicPartition)
+
+/**
+*@brief Interleaves the values from the "x" tensors into a single tensor. \n
+
+*@par Inputs:
+*Including:
+* @li indices: A list of at least 1 Tensor objects with type DT_INT32. It's a dynamic input.
+* @li x: A list with the same length as "indices" of Tensor objects.
+Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_QINT32,
+DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128. It's a dynamic input. \n
+
+*@par Attributes:
+*N: An int that is >= 1. Defaults to "1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@attention Constraints:
+*DynamicStitch runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator DynamicStitch. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicStitch)
+    .DYNAMIC_INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_QINT32, DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, \
+        DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+        DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_QINT32, DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, \
+        DT_COMPLEX128}))
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(DynamicStitch)
+
+/**
+*@brief Interleaves the values from the "x" tensors into a single tensor. \n
+
+*@par Inputs:
+*Including:
+* @li indices: A list of at least 1 Tensor objects with type DT_INT32. It's a dynamic input.
+* @li x: A list with the same length as "indices" of Tensor objects. It's a dynamic input.
+Must be one of the following types: DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING,
+DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT32. \n
+
+*@par Attributes:
+*N: An int that is >= 1. Defaults to "1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@attention Constraints:
+*ParallelDynamicStitch runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ParallelDynamicStitch.
+*/
+
+REG_OP(ParallelDynamicStitch)
+    .DYNAMIC_INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(x,
+        TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, \
+        DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT32 }))
+    .OUTPUT(y,
+        TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, \
+        DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT32 }))
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(ParallelDynamicStitch)
+
+/**
+*@brief Removes all elements in the underlying container. \n
+
+*@par Attributes:An optional int that is >= 0. Defaults to "0".
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@attention Constraints:
+*MapClear runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapClear.
+*/
+
+REG_OP(MapClear)
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapClear)
+
+/**
+*@brief Returns the number of incomplete elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. \n
+
+*@attention Constraints:
+*MapIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapIncompleteSize.
+*/
+
+REG_OP(MapIncompleteSize)
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapIncompleteSize)
+
+/**
+*@brief Unstage Op is similar to a lightweight Dequeue. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to 0.
+*@li memory_limit: An optional int that is >= 0. Defaults to 0.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "".
+*@li dtypes: A list of DTypes that has length >= 1. \n
+
+*@par Outputs:
+*y: A list of Tensor objects of type dtypes. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Unstage operator.
+*/
+
+REG_OP(Unstage)
+    .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT16, \
+            DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, \
+            DT_DOUBLE, DT_UINT32, DT_UINT64}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .REQUIRED_ATTR(dtypes, ListType)
+    .OP_END_FACTORY_REG(Unstage)
+
+/**
+*@brief Stage (key, values) in the underlying container which behaves like a hashtable. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32.
+* @li values: A list of Tensor objects for tensor dtypes.
+A list of data types that inserted values should adhere to of.
+Must be one of the following types: DT_FLOAT, DT_FLOAT16,
+DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64,
+DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128,
+DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32.
+It's a dynamic input. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+Maximum number of elements in the Staging Area. If > 0,
+inserts on the container will block when the capacity is reached.
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes.
+*@li container: An optional string. Defaults to "".
+If non-empty, this queue is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "".
+It is necessary to match this name to the matching Unstage Op. \n
+
+*@attention Constraints:
+*MapStage runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapStage.
+*/
+
+REG_OP(MapStage)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapStage)
+
+/**
+*@brief Removes and returns the values associated with the key. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*values: A list of Tensor objects. Must be one of the following types:
+DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE,
+DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8,
+DT_QINT16, DT_QUINT16, DT_QINT32. \n
+
+*@attention Constraints:
+*MapUnstage runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapUnstage.
+*/
+
+REG_OP(MapUnstage)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapUnstage)
+
+/**
+*@brief Removes and returns a random (key, value). \n
+
+*@par Inputs:
+*Including:
+*indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*@li key: A Tensor of type DT_INT64.
+*@li values: A list of Tensor objects.
+Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8,
+DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE,
+DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128,
+DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n
+
+*@attention Constraints:
+*MapUnstageNoKey runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapUnstageNoKey.
+*/
+
+REG_OP(MapUnstageNoKey)
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(key, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapUnstageNoKey)
+
+/**
+*@brief Peeks at the values at the specified key. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*values: A list of Tensor objects of type "dtypes".
+Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8,
+DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL,
+DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64,
+DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n
+
+*@attention Constraints:
+*MapPeek runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapPeek.
+*/
+
+REG_OP(MapPeek)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapPeek)
+
+/**
+*@brief Returns the number of elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. \n
+
+*@attention Constraints:
+*MatMul runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator MapSize.
+*/
+
+REG_OP(MapSize)
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(MapSize)
+
+/**
+*@brief Class wrapping dynamic-sized, per-time-step, write-once Tensor arrays. \n
+
+*@par Inputs:
+*The input size must be type int32. Inputs include:
+*@li size: int32 scalar Tensor: the size of the TensorArray. Required if
+handle is not provided. \n
+
+*@par Attributes:
+*@li dtype: The data type of this TensorArray.
+*@li element_shape: The TensorShape of elements in this TensorArray.
+*@li dynamic_size: A boolean that determines whether writes to the
+TensorArray are allowed to grow the size.
+*@li clear_after_read: Boolean (optional, default: True). If True, clear
+TensorArray values
+after reading them. This disables read-many semantics, but allows early
+release of memory.
+*@li identical_element_shapes: If true (default is false), then all elements
+in the TensorArray will be expected to have have identical shapes.
+*@li tensor_array_name: String: the name of the TensorArray. \n
+
+*@par Outputs:
+*@li handle: The handle to the TensorArray.
+*@li flow: A scalar used to control gradient flow. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArray operator.
+*/
+
+REG_OP(TensorArray)
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(flow, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
+    .ATTR(dynamic_size, Bool, false)
+    .ATTR(clear_after_read, Bool, true)
+    .ATTR(identical_element_shapes, Bool, false)
+    .ATTR(tensor_array_name, String, "")
+    .OP_END_FACTORY_REG(TensorArray)
+
+/**
+*@brief Delete the TensorArray from its resource container. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*handle: A Tensor of type resource. The handle to a TensorArray
+(output of TensorArray or TensorArrayGrad). \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayClose operator.
+*/
+
+REG_OP(TensorArrayClose)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OP_END_FACTORY_REG(TensorArrayClose)
+
+/**
+*@brief Concat the elements from the TensorArray into value value. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Attributes:
+*@li dtype: The type of the elem that is returned.
+*@li element_shape_except0: The expected shape of an element, if known,
+excluding the first dimension. \n
+
+*@par Outputs:
+*@li value: All of the elements in the TensorArray, concatenated along
+the first axis.
+*@li lengths: A vector of the row sizes of the original T elements in the
+value output. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayConcat operator.
+*/
+
+REG_OP(TensorArrayConcat)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(flow_in, TensorType({DT_FLOAT}))
+    .OUTPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8,
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
+        DT_QUINT8, DT_QINT32}))
+    .OUTPUT(lengths, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK)
+    .OP_END_FACTORY_REG(TensorArrayConcat)
+
+/**
+*@brief All elements selected by indices must have the same shape. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li indices: The locations in the TensorArray from which to read tensor
+elements.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Attributes:
+*@li dtype: The type of the elem that is returned.
+*@li element_shape: The expected shape of an element, if known. Used to
+validate the shapes of TensorArray elements. If this shape is not fully
+specified, gathering zero-size TensorArrays is an error. \n
+
+*@par Outputs:
+*value:  All of the elements in the TensorArray, concatenated along a new
+axis (the new dimension 0). \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayGather operator.
+*/
+
+REG_OP(TensorArrayGather)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(flow_in, TensorType({DT_FLOAT}))
+    .OUTPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8,
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
+        DT_QUINT8, DT_QINT32}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
+    .OP_END_FACTORY_REG(TensorArrayGather)
+
+/**
+*@brief Creates a TensorArray for storing the gradients of values in the
+given handle. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Attributes:
+*source: The gradient source string, used to decide which gradient
+TensorArray to return. \n
+
+*@par Outputs:
+*@li grad_handle: A Tensor of type resource.
+*@li flow_out: A Tensor of type float. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayGrad operator.
+*/
+
+REG_OP(TensorArrayGrad)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(flow_in, TensorType({DT_FLOAT}))
+    .OUTPUT(grad_handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(flow_out, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(source, String)
+    .OP_END_FACTORY_REG(TensorArrayGrad)
+
+/**
+*@brief Push an element onto the tensor_array. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li index: The position to write to inside the TensorArray.
+*@li value: The tensor to write to the TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Outputs:
+*flow_out: A float scalar that enforces proper chaining of operations. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayWrite operator.
+*/
+
+REG_OP(TensorArrayWrite)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8,
+        DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(flow_in, TensorType({DT_FLOAT}))
+    .OUTPUT(flow_out, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(TensorArrayWrite)
+
+/**
+*@brief Creates a TensorArray for storing multiple gradients of values in
+the given handle. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: A Tensor of type resource. The handle to the forward TensorArray.
+*@li flow_in: A Tensor of type float. A float scalar that enforces proper
+chaining of operations.
+*@li shape_to_prepend: A Tensor of type int32. An int32 vector representing
+a shape. \n
+
+*@par Attributes:
+*source: A string. The gradient source string, used to decide which gradient
+TensorArray to return. \n
+
+*@par Outputs:
+*@li grad_handle: A Tensor of type resource.
+*@li flow_out: A Tensor of type float. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayGradWithShape operator.
+*/
+
+REG_OP(TensorArrayGradWithShape)
+    .INPUT(handle, TensorType({ DT_RESOURCE }))
+    .INPUT(flow_in, TensorType({ DT_FLOAT }))
+    .INPUT(shape_to_prepend, TensorType({ DT_INT32 }))
+    .OUTPUT(grad_handle, TensorType({ DT_RESOURCE }))
+    .OUTPUT(flow_out, TensorType({ DT_FLOAT }))
+    .ATTR(source, String, "")
+    .OP_END_FACTORY_REG(TensorArrayGradWithShape)
+
+/**
+*@brief Read an element from the TensorArray into output value. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: A Tensor of type resource. The handle to a TensorArray.
+*@li index: A Tensor of type int32.
+*@li flow_in: A Tensor of type float. \n
+
+*@par Attributes:
+*dtype: A DType. \n
+
+*@par Outputs:
+*y: A Tensor of type dtype. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayRead operator.
+*/
+
+REG_OP(TensorArrayRead)
+    .INPUT(handle, TensorType({ DT_RESOURCE }))
+    .INPUT(index, TensorType({ DT_INT32 }))
+    .INPUT(flow_in, TensorType({ DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(TensorArrayRead)
+
+/**
+*@brief Scatter the data from the input value into specific TensorArray
+elements. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li indices: The locations at which to write the tensor elements.
+*@li value: The concatenated tensor to write to the TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Outputs:
+*flow_out: A float scalar that enforces proper chaining of operations. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArrayScatter operator.
+*/
+
+REG_OP(TensorArrayScatter)
+    .INPUT(handle, TensorType({ DT_RESOURCE }))
+    .INPUT(indices, TensorType({ DT_INT32 }))
+    .INPUT(value, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128 }))
+    .INPUT(flow_in, TensorType({ DT_FLOAT }))
+    .OUTPUT(flow_out, TensorType({ DT_FLOAT }))
+    .OP_END_FACTORY_REG(TensorArrayScatter)
+
+/**
+*@brief Split the data from the input value into TensorArray elements. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li value: The concatenated tensor to write to the TensorArray.
+*@li lengths: The vector of lengths, how to split the rows of value into
+the TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Outputs:
+*flow_out: A float scalar that enforces proper chaining of operations. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArraySplit operator.
+*/
+
+REG_OP(TensorArraySplit)
+    .INPUT(handle, TensorType({ DT_RESOURCE }))
+    .INPUT(value, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE,
+        DT_STRING, DT_COMPLEX64, DT_COMPLEX128 }))
+    .INPUT(lengths, TensorType({ DT_INT64 }))
+    .INPUT(flow_in, TensorType({ DT_FLOAT }))
+    .OUTPUT(flow_out, TensorType({ DT_FLOAT }))
+    .OP_END_FACTORY_REG(TensorArraySplit)
+
+/**
+*@brief Return the number of elements in a TensorArray. \n
+
+*@par Inputs:
+*The input handle must be type resource. Inputs include:
+*@li handle: The handle to a TensorArray.
+*@li flow_in: A float scalar that enforces proper chaining of operations. \n
+
+*@par Outputs:
+*size: The number of elements in a TensorArray.. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow TensorArraySize operator.
+*/
+
+REG_OP(TensorArraySize)
+    .INPUT(handle, TensorType({ DT_RESOURCE }))
+    .INPUT(flow_in, TensorType({ DT_FLOAT }))
+    .OUTPUT(size, TensorType({ DT_INT32 }))
+    .OP_END_FACTORY_REG(TensorArraySize)
+
+/**
+*@brief A queue implementation that dequeues elements in a random order. \n
+
+*@par Attributes:
+*@li shapes: (Optional.) A list of fully-defined TensorShape objects with
+the same length as dtypes, or None.
+*@li capacity: An integer. The upper bound on the number of elements that may
+be stored in this queue.
+*@li min_after_dequeue: An integer (described above).
+*@li seed: An integer. Used to create a random seed.
+*@li seed2: An integer. Used to create a random seed.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. The handle to a stack. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow RandomShuffleQueue operator.
+*/
+
+REG_OP(RandomShuffleQueue)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .REQUIRED_ATTR(component_types, ListType)
+    .ATTR(shapes, ListListInt, {})
+    .ATTR(capacity, Int, -1)
+    .ATTR(min_after_dequeue, Int, 0)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(RandomShuffleQueue)
+
+/**
+*@brief A queue that produces elements in first-in first-out order. \n
+
+*@par Attributes:
+*@li shapes: An optional list of shapes for each component of
+a queue element. Defaults to {}. The length of this attr must be
+either 0 or the same as the length of "component_types". Shapes of fixed
+rank but variable size are allowed by setting any shape dimension to "-1".
+In this case, the inputs' shape may vary along the given dimension,
+and DequeueMany will pad the given dimension with zeros up to the maximum
+shape of all elements in the given batch. If the length of this attr is "0",
+different queue elements may have different ranks and shapes, but only one
+element may be dequeued at a time.
+*@li capacity: An optional int. Defaults to "-1". The upper bound on the number
+of elements in this queue. Negative numbers mean no limit.
+*@li container: An optional string. Defaults to "". If non-empty, this queue
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this queue
+will be shared under the given name across multiple sessions. \n
+
+*@par Outputs:
+*handle: A Tensor of type DT_RESOURCE. \n
+
+*@attention Constraints:
+*PaddingFIFOQueue runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator PaddingFIFOQueue.
+*/
+
+REG_OP(PaddingFIFOQueue)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .REQUIRED_ATTR(component_types, ListType)
+    .ATTR(shapes, ListListInt, {})
+    .ATTR(capacity, Int, -1)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(PaddingFIFOQueue)
+
+/**
+*@brief A queue that produces elements sorted by the first component value. \n
+
+*@par Attributes:
+*@li component_types: An optional list of tf.DTypes. Defaults to {}.
+The type of each component in a value.
+*@li shapes: A list of shapes for each component of a queue element.
+The length of this attr must be either 0 or the same as the length of
+"component_types". If the length of this attr is 0, the shapes of queue
+elements are not constrained, and only one element may be dequeued at a time.
+*@li container: An optional string. Defaults to "". If non-empty, this queue
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this
+queue will be shared under the given name across multiple sessions. \n
+
+*@par Outputs:
+*handle: A Tensor of type DT_RESOURCE. \n
+
+*@attention Constraints:
+*PriorityQueue runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator PriorityQueue.
+*/
+
+REG_OP(PriorityQueue)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(component_types, ListType, {})
+    .ATTR(shapes, ListListInt, {})
+    .ATTR(capacity, Int, -1)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(PriorityQueue)
+
+/**
+*@brief Multiplies the matrix "x1" by the matrix "x2". \n
+
+*@par Inputs:
+*Including:
+*handle: A Tensor of type DT_RESOURCE. The handle to a queue. \n
+
+*@par Attributes:
+*cancel_pending_enqueues: An optional bool. Defaults to "False".
+If true, all pending enqueue requests that are blocked on
+the given queue will be canceled. \n
+
+*@attention Constraints:
+*QueueClose runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator QueueClose.
+*/
+
+REG_OP(QueueClose)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(cancel_pending_enqueues, Bool, false)
+    .OP_END_FACTORY_REG(QueueClose)
+
+/**
+*@brief Stage (key, values) in the underlying container which behaves like an ordered associative container. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32.
+* @li values: A list of Must be one of the following types:
+DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64,
+DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
+DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 that inserted
+values should adhere to. It's a dynamic input.  \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+Maximum number of elements in the Staging Area.
+If > 0, inserts on the container will block
+when the capacity is reached.
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes.
+*@li container: An optional string. Defaults to "".
+If non-empty, this queue is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "".
+It is necessary to match this name to the matching Unstage Op. \n
+
+*@attention Constraints:
+*OrderedMapStage runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapStage.
+*/
+
+REG_OP(OrderedMapStage)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapStage)
+
+/**
+*@brief Returns the number of elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. \n
+
+*@attention Constraints:
+*OrderedMapSize runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapSize.
+*/
+
+REG_OP(OrderedMapSize)
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapSize)
+
+/**
+*@brief Removes all elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@attention Constraints:
+*OrderedMapClear runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapClear.
+*/
+
+REG_OP(OrderedMapClear)
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapClear)
+
+/**
+*@brief FakeQueue, support tf api FixedLengthRecordReader. \n
+
+*@par Inputs:
+*Including:
+* @li resource: A Tensor of type DT_RESOURCE.
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING ref. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FakeQueue.
+*/
+REG_OP(FakeQueue)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .OUTPUT(handle, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(FakeQueue)
+
+/**
+*@brief Returns the number of incomplete elements in the underlying container. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. \n
+
+*@attention Constraints:
+*OrderedMapIncompleteSize runs on the Ascend AI CPU,
+which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapIncompleteSize.
+*/
+
+REG_OP(OrderedMapIncompleteSize)
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapIncompleteSize)
+
+/**
+*@brief Peeks at the values at the specified key. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*values: A list of Tensor objects. Must be one of the following types:
+DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING,
+DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n
+
+*@attention Constraints:
+*OrderedMapPeek runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapPeek.
+*/
+
+REG_OP(OrderedMapPeek)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapPeek)
+
+/**
+*@brief Removes and returns the (key, value) element with the smallest. \n
+
+*@par Inputs:
+*Including:
+* @li indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*@li key: A Tensor of type DT_INT64.
+*@li values: A list of Tensor objects. Must be one of the following types:
+DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, DT_UINT64, DT_RESOURCE, DT_STRING,
+DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. \n
+
+*@attention Constraints:
+*OrderedMapUnstageNoKey runs on the Ascend AI CPU,
+which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapUnstageNoKey.
+*/
+
+REG_OP(OrderedMapUnstageNoKey)
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(key, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_UINT32, \
+        DT_UINT64, DT_RESOURCE, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, \
+        DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32 }))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapUnstageNoKey)
+
+/**
+*@brief Removes and returns the values associated with the key. \n
+
+*@par Inputs:
+*Including:
+* @li key: A Tensor of type DT_INT64.
+* @li indices: A Tensor of type DT_INT32. \n
+
+*@par Attributes:
+*@li capacity: An optional int that is >= 0. Defaults to "0".
+*@li memory_limit: An optional int that is >= 0. Defaults to "0".
+*@li dtypes: A list of tf.DTypes that has length >= 1.
+*@li container: An optional string. Defaults to "".
+*@li shared_name: An optional string. Defaults to "". \n
+
+*@par Outputs:
+*values: A list of Tensor objects. Must be one of the following types:
+DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT,
+DT_FLOAT16, DT_DOUBLE, DT_BOOL, DT_UINT32, DT_UINT64. \n
+
+*@attention Constraints:
+*OrderedMapUnstage runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator OrderedMapUnstage.
+*/
+
+REG_OP(OrderedMapUnstage)
+    .INPUT(key, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                                        DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16,
+                                        DT_DOUBLE, DT_BOOL, DT_UINT32, DT_UINT64}))
+    .ATTR(capacity, Int, 0)
+    .ATTR(memory_limit, Int, 0)
+    .ATTR(dtypes, ListType, {})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(OrderedMapUnstage)
+
+/**
+*@brief A barrier represents a key-value map, where each key is a string,
+and each value is a tuple of tensors. \n
+
+*@par Attributes:
+*@li component_types: The type of each component in a value.
+*@li shapes: A list of shapes for each component of a queue element.
+Each shape must be 1 in the first dimension.
+The length of this attr must be the same as
+the length of "component_types".
+*@li capacity: The capacity of the barrier.
+The default capacity is MAX_INT32,
+which is the largest capacity of the underlying queue.
+*@li container: If non-empty, this barrier is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: If non-empty, this barrier will be shared under
+the given name across multiple sessions. \n
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING_REF. The handle to the barrier. \n
+
+*@attention Constraints:
+*Barrier runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Barrier. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Barrier)
+    .OUTPUT(handle, TensorType({DT_STRING_REF}))
+    .REQUIRED_ATTR(component_types, ListType)
+    .ATTR(shapes, ListListInt, {})
+    .ATTR(capacity, Int, -1)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(Barrier)
+
+/**
+*@brief For each key, assigns the respective value to the specified component. \n
+
+*@par Inputs:
+*Including:
+* @li handle: A Tensor of type DT_STRING_REF. The handle to a barrier.
+* @li keys: A Tensor of type DT_STRING. A 1D tensor of keys.
+* @li values: An any-dimensional tensor of values, which are associated
+with the respective keys. The 0th dimension must have length n
+Must be one of the following types: DT_FLOAT, DT_FLOAT16, DT_INT8,
+DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL,
+DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128,  DT_RESOURCE, DT_STRING. \n
+
+*@par Attributes:
+*component_index: The component of the barrier elements that is being assigned. \n
+
+*@attention Constraints:
+*BarrierInsertMany runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BarrierInsertMany. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BarrierInsertMany)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(keys, TensorType({DT_STRING}))
+    .INPUT(values,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .REQUIRED_ATTR(component_index, Int)
+    .OP_END_FACTORY_REG(BarrierInsertMany)
+
+/**
+*@brief Takes the given number of completed elements from a barrier. \n
+
+*@par Inputs:
+*Including:
+* @li handle: A Tensor of type DT_STRING_REF. The handle to a barrier.
+* @li num_elements: A Tensor of type DT_INT32.
+A single-element tensor containing the number of elements to take. \n
+
+*@par Attributes:
+*@li component_types: The type of each component in a value.
+*@li allow_small_batch: Allow to return less than "num_elements"
+items if barrier is already closed.
+*@li wait_for_incomplete: An any-dimensional tensor
+for each component in the barrier element.
+*@li timeout_ms: If the queue is empty, this operation will block for up to
+"timeout_ms" milliseconds. Note: This option is not supported yet. \n
+
+*@par Outputs:
+*@li indices: A 1D tensor of type DT_INT64. The indices, with length "num_elems".
+These indices refer to the batch in which the values were
+placed into the barrier.
+*@li keys: A 1D tensor of keys,
+with length "num_elements" of type DT_STRING.
+*@li values: A 1D tensor per component in a barrier element.
+All values have length "num_elements" along the 0th dimension.
+Must be one of the following types:
+DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128,
+DT_RESOURCE, DT_STRING. \n
+
+*@attention Constraints:
+*BarrierTakeMany runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BarrierTakeMany. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BarrierTakeMany)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(num_elements, TensorType(DT_INT32))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(keys, TensorType({DT_STRING}))
+    .DYNAMIC_OUTPUT(values,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .REQUIRED_ATTR(component_types, ListType)
+    .ATTR(allow_small_batch, Bool, false)
+    .ATTR(wait_for_incomplete, Bool, false)
+    .ATTR(timeout_ms, Int, -1)
+    .OP_END_FACTORY_REG(BarrierTakeMany)
+
+/**
+*@brief Closes the given barrier. \n
+
+*@par Inputs:
+*Including:
+*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n
+
+*@par Attributes:
+*cancel_pending_enqueues: If true, all pending enqueue requests
+that are blocked on the barrier's queue will
+be canceled. InsertMany will fail,
+even if no new key is introduced. \n
+
+*@attention Constraints:
+*BarrierClose runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BarrierClose. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BarrierClose)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .ATTR(cancel_pending_enqueues, Bool, false)
+    .OP_END_FACTORY_REG(BarrierClose)
+
+/**
+*@brief Computes the number of complete elements in the given barrier. \n
+
+*@par Inputs:
+*Including:
+*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. The number of complete elements. \n
+
+*@attention Constraints:
+*BarrierReadySize runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BarrierReadySize. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BarrierReadySize)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .OUTPUT(size, TensorType(DT_INT32))
+    .OP_END_FACTORY_REG(BarrierReadySize)
+
+/**
+*@brief Computes the number of incomplete elements in the given barrier. \n
+
+*@par Inputs:
+*Including:
+*handle: A Tensor of type DT_STRING_REF. The handle to a barrier. \n
+
+*@par Outputs:
+*size: A Tensor of type DT_INT32. The number of incomplete elements in the barrier. \n
+
+*@attention Constraints:
+*BarrierIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BarrierIncompleteSize. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(BarrierIncompleteSize)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .OUTPUT(size, TensorType(DT_INT32))
+    .OP_END_FACTORY_REG(BarrierIncompleteSize)
+
+/**
+*@brief Emits randomized records. \n
+
+*@par Attributes:
+*@li file_pattern: A string. Glob pattern for the data files.
+*@li file_random_seed: An optional int. Defaults to 301. Random seeds used to
+produce randomized records.
+*@li file_shuffle_shift_ratio: An optional float. Defaults to 0. Shifts the
+list of files after the list is randomly shuffled.
+*@li file_buffer_size: An optional int. Defaults to 10000. The randomization
+shuffling buffer.
+*@li file_parallelism: An optional int. Defaults to 16. How many sstables are
+opened and concurrently iterated over.
+*@li batch_size: An optional int. Defaults to 32. The batch size.
+*@li compression_type: An optional string. Defaults to "". The type of
+compression for the file. Currently ZLIB and GZIP are supported. \n
+
+*@par Outputs:
+*records: A Tensor of type string. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow RecordInput operator.
+*/
+
+REG_OP(RecordInput)
+    .OUTPUT(records, TensorType({DT_STRING}))
+    .REQUIRED_ATTR(file_pattern, String)
+    .ATTR(file_random_seed, Int, 301)
+    .ATTR(file_shuffle_shift_ratio, Float, 0)
+    .ATTR(file_buffer_size, Int, 10000)
+    .ATTR(file_parallelism, Int, 16)
+    .ATTR(batch_size, Int, 32)
+    .ATTR(compression_type, String, "")
+    .OP_END_FACTORY_REG(RecordInput)
+
+/**
+*@brief A conditional accumulator for aggregating gradients. \n
+
+*@par Attributes:
+*@li dtype: The type of the value being accumulated.
+*@li shape: The shape of the values, can be [], in which case shape is unknown.
+*@li container: If non-empty, this accumulator is placed in the given container.
+Otherwise, a default container is used.
+*@li shared_name: If non-empty, this accumulator will be shared under the given
+name across multiple sessions.
+*@li reduction_type: reduction operator type, default "MEAN". \n
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING_REF. The handle to the accumulator. \n
+
+*@attention Constraints:
+*ConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ConditionalAccumulator. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ConditionalAccumulator)
+    .OUTPUT(handle, TensorType({DT_STRING_REF}))
+    .REQUIRED_ATTR(dtype, Type)
+    .REQUIRED_ATTR(shape, ListInt)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(reduction_type, String, "MEAN")
+    .OP_END_FACTORY_REG(ConditionalAccumulator)
+
+/**
+*@brief Applies a gradient to a given accumulator. \n
+
+*@par Inputs:
+*Does not add if "local_step" is lesser than the accumulator's "global_step".
+* @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator.
+* @li local_step: A Tensor of type DT_INT64.
+The "local_step" value at which the gradient was computed. \n
+
+* @li gradient: A tensor of the gradient to be accumulated.
+Must be one of the following types:
+DT_FLOAT16, DT_FLOAT, DT_DOUBLE
+
+*@par Attributes:
+*dtype: Must be one of the following types:
+DT_FLOAT16, DT_FLOAT, DT_DOUBLE
+
+*@attention Constraints:
+*AccumulatorApplyGradient runs on the Ascend AI CPU,
+which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AccumulatorApplyGradient. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AccumulatorApplyGradient)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(local_step, TensorType({DT_INT64}))
+    .INPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(AccumulatorApplyGradient)
+
+/**
+*@brief Returns the number of gradients aggregated in the given accumulators. \n
+
+*@par Inputs:
+*Including:
+*handle: A Tensor of type DT_STRING_REF. The handle to an accumulator. \n
+
+*@par Outputs:
+*y: A Tensor of type DT_INT32. The number of gradients aggregated
+in the given accumulator. \n
+
+*@attention Constraints:
+*AccumulatorNumAccumulated runs on the Ascend AI CPU,
+which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AccumulatorNumAccumulated. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AccumulatorNumAccumulated)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(AccumulatorNumAccumulated)
+
+/**
+*@brief Updates the accumulator with a new value for "global_step". \n
+
+*@par Inputs:
+*Input "new_global_step" is a scalar.
+* @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator.
+* @li new_global_step: The new "global_step" value to set A Tensor of type DT_INT64. \n
+
+*@attention Constraints:
+*AccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AccumulatorSetGlobalStep. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AccumulatorSetGlobalStep)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(new_global_step, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(AccumulatorSetGlobalStep)
+
+/**
+*@brief Extracts the average gradient in the given ConditionalAccumulator. \n
+
+*@par Inputs:
+* Input "num_required" is a scalar.
+* @li handle: A Tensor of type DT_STRING_REF. The handle to an accumulator.
+* @li num_required: A Tensor of type DT_INT32.
+Number of gradients required before an aggregate is returned. \n
+
+*@par Attributes:
+*dtype: The data type of accumulated gradients.
+Needs to correspond to the type of the accumulator. \n
+
+*@par Outputs:
+*y: The average of the accumulated gradients.
+Must be one of the following types:
+DT_FLOAT16, DT_FLOAT, DT_DOUBLE. \n
+
+*@attention Constraints:
+*AccumulatorTakeGradient runs on the Ascend AI CPU,
+ which delivers poor performance.
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AccumulatorTakeGradient. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AccumulatorTakeGradient)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(num_required, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(AccumulatorTakeGradient)
+
+/**
+*@brief A conditional accumulator for aggregating sparse gradients. \n
+
+*@par Attributes:
+*@li shape: The shape of the values.
+*@li dtype: The type of the value being accumulated.
+*@li container: If non-empty, this accumulator is placed in the given
+container. Otherwise, a default container is used.
+*@li shared_name: If non-empty, this accumulator will be shared under the
+given name across multiple sessions.
+*@li reduction_type: The reduction method whose type is string,
+default is "MEAN". \n
+
+*@par Outputs:
+*handle: The handle to the accumulator. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseConditionalAccumulator operator. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SparseConditionalAccumulator)
+    .OUTPUT(handle, TensorType({DT_STRING_REF}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(reduction_type, String, "MEAN")
+    .OP_END_FACTORY_REG(SparseConditionalAccumulator)
+
+/**
+*@brief Applies a sparse gradient to a given accumulator. \n
+
+*@par Inputs:
+*The input handle must be type string_ref. Inputs include:
+*@li handle: A Tensor of type mutable string. The handle to a accumulator.
+*@li local_step: A Tensor of type int64. The local_step value at which the
+sparse gradient was computed.
+*@li indices: A Tensor of type int64. Indices of the sparse gradient to be
+accumulated. Must be a vector.
+*@li values: A Tensor. Values are the non-zero slices of the gradient,
+and must have the same first dimension as indices, i.e., the nnz represented
+by indices and values must be consistent.
+*@li shape: A Tensor of type int64. \n
+
+*@par Attributes:
+*@li has_known_shape: A bool. Boolean indicating whether gradient_shape is
+unknown, in which case the input is ignored during validation.
+*@li dtype: The data type of accumulated gradients. Needs to correspond to
+the type of the accumulator. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseAccumulatorApplyGradient operator. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SparseAccumulatorApplyGradient)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(local_step, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_UINT32, \
+        DT_UINT64, DT_COMPLEX64, DT_COMPLEX128, DT_QINT16, DT_QUINT16, \
+        DT_QINT8, DT_QUINT8, DT_QINT32}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(has_known_shape, Bool)
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(SparseAccumulatorApplyGradient)
+
+/**
+*@brief Extracts the average sparse gradient in a SparseConditionalAccumulator. \n
+
+*@par Inputs:
+*The input handle must be type string_ref. Inputs include:
+*@li handle: The handle to a SparseConditionalAccumulator.
+*@li num_required: Number of gradients required before we return an aggregate. \n
+
+*@par Attributes:
+*dtype: The data type of accumulated gradients. Needs to correspond to the
+type of the accumulator. \n
+
+*@par Outputs:
+*@li indices: Indices of the average of the accumulated sparse gradients.
+*@li values: Values of the average of the accumulated sparse gradients.
+*@li shape: Shape of the average of the accumulated sparse gradients. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseAccumulatorTakeGradient operator. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SparseAccumulatorTakeGradient)
+    .INPUT(handle, TensorType({DT_STRING_REF}))
+    .INPUT(num_required, TensorType({DT_INT32}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(SparseAccumulatorTakeGradient)
+
+/**
+*@brief A conditional accumulator for aggregating gradients. \n
+
+*@par Attributes:
+* @li dtype: The type of the value being accumulated.
+* @li shape: The shape of the values, can be [], in which case shape is unknown.
+* @li container: If non-empty, this accumulator is placed in the given container.
+Otherwise, a default container is used.
+* @li shared_name: If non-empty, this accumulator will be shared under the given
+name across multiple sessions.
+* @li reduction_type: reduction operator type, default "MEAN". \n
+
+*@par Outputs:
+*handle: A Tensor of type DT_RESOURCE. The handle to the accumulator. \n
+
+*@attention Constraints:
+*ResourceConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ResourceConditionalAccumulator. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ResourceConditionalAccumulator)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .REQUIRED_ATTR(shape, ListInt)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(reduction_type, String, "MEAN")
+    .OP_END_FACTORY_REG(ResourceConditionalAccumulator)
+
+/**
+*@brief Applies a gradient to a given accumulator.
+Does not add if "local_step" is lesser than the accumulator's "global_step". \n
+
+*@par Inputs:
+* @li handle: The handle to an accumulator.
+* @li local_step: The "local_step" value at which the gradient was computed.
+* @li gradient: A tensor of the gradient to be accumulated.
+Must be one of the following types:
+DT_FLOAT16, DT_FLOAT, DT_DOUBLE
+
+*@attention Constraints:
+*ResourceAccumulatorApplyGradient runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ResourceAccumulatorApplyGradient. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ResourceAccumulatorApplyGradient)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(local_step, TensorType({DT_INT64}))
+    .INPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(ResourceAccumulatorApplyGradient)
+
+/**
+*@brief Returns the number of gradients aggregated in the given accumulators. \n
+
+*@par Inputs:
+*handle: The handle to an accumulator. \n
+
+*@par Outputs:
+*num_accumulated: The number of gradients aggregated in the given accumulator. \n
+
+*@attention Constraints:
+*ResourceAccumulatorNumAccumulated runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ResourceAccumulatorNumAccumulated. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ResourceAccumulatorNumAccumulated)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(num_accumulated, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(ResourceAccumulatorNumAccumulated)
+
+/**
+*@brief Updates the accumulator with a new value for "global_step". \n
+
+*@par Inputs:
+* @li handle: The handle to an accumulator.
+* @li new_global_step: The new "global_step" value to set. \n
+
+*@attention Constraints:
+*ResourceAccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ResourceAccumulatorSetGlobalStep. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ResourceAccumulatorSetGlobalStep)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(new_global_step, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(ResourceAccumulatorSetGlobalStep)
+
+/**
+*@brief Extracts the average gradient in the given ConditionalAccumulator. \n
+
+*@par Inputs:
+* @li handle: The handle to an accumulator.
+* @li num_required: Number of gradients required before an aggregate is returned. \n
+
+*@par Attributes:
+*dtype: The data type of accumulated gradients.
+Needs to correspond to the type of the accumulator. \n
+
+*@par Outputs:
+*average: The average of the accumulated gradients.
+Must be one of the following types:
+DT_FLOAT16, DT_FLOAT, DT_DOUBLE. \n
+
+*@attention Constraints:
+*ResourceAccumulatorTakeGradient runs on the Ascend AI CPU, which delivers poor performance. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ResourceAccumulatorTakeGradient. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ResourceAccumulatorTakeGradient)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(num_required, TensorType({DT_INT32}))
+    .OUTPUT(average, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(ResourceAccumulatorTakeGradient)
+
+/**
+*@brief Enqueue a Tensor on the computation outfeed. \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: float16, float32,
+float64, int8, int16, uint16, uint8, int32, int64, uint32, uint64,
+bool, double, string. It's a dynamic input. \n
+
+*@par Attributes:
+*channel_name: name of operator channel, default "". \n
+
+*@attention Constraints:
+*The implementation for OutfeedEnqueueOp on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow OutfeedEnqueueOp operator.
+*/
+REG_OP(OutfeedEnqueueOp)
+  .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8,
+      DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+      DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING}))
+  .ATTR(channel_name, String, "")
+  .OP_END_FACTORY_REG(OutfeedEnqueueOp)
+
+/**
+*@brief LruCache, create cache resource.
+*@par Inputs:
+*No input.
+*@par Attributes:
+*cache_size: cache size An optional "int64". Defaults to "100000".
+*load_factor: rate which show if cache is full An optional "float", Defaults to "1".
+*@par Outputs:
+*cache: cache resource.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(LruCache)
+  .OUTPUT(cache, TensorType({DT_RESOURCE}))
+  .ATTR(container, String, "")
+  .ATTR(shared_name, String, "LruCache")
+  .ATTR(cache_size, Int, 100000)
+  .ATTR(load_factor, Float, 1)
+  .REQUIRED_ATTR(dtype, Type)
+  .OP_END_FACTORY_REG(LruCache)
+
+/**
+*@brief CacheAdd, get id new come in cache and id get out of cache.
+*@par Inputs:
+*cache: resource data
+*ids: Tensor stored id need to insert cache
+*@par Outputs:
+*swap_in_id: id come in cache.
+*swap_in_idx: id in cache which come in cache
+*swap_out_id: id get out of cache
+*swap_out_idx: id in cache which get out of cache
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheAdd)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OP_END_FACTORY_REG(CacheAdd)
+
+/**
+*@brief CacheRemoteToLocalIndex, get id in cache from id.
+*@par Inputs:
+*cache: resource data
+*ids: Tensor stored id need to insert cache
+*@par Outputs:
+*local_idx: id in cache.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheRemoteIndexToLocal)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)
+
+/**
+*@brief CacheAllToLocalIndex, get id in cache
+*@par Inputs:
+*cache: resource data
+*local_idx: id in cache.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheAllIndexToLocal)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .REQUIRED_ATTR(dtype, Type)
+  .OP_END_FACTORY_REG(CacheAllIndexToLocal)
+
+REG_OP(DynamicGetNext)
+  .INPUT(x, TensorType::ALL())
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
+  .ATTR(_getnext_inputs_shape_range, String, "")
+  .OP_END_FACTORY_REG(DynamicGetNext)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h
new file mode 100644
index 00000000..9f981d12
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h
@@ -0,0 +1,3735 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file elewise_calculation_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Adds all input tensors element-wise. \n
+
+*@par Inputs:
+*Dynamic inputs, including:
+* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
+*   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same shape and type as the elements of "x". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AddN.
+*/
+REG_OP(AddN)
+    .DYNAMIC_INPUT(x, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(AddN)
+
+/**
+*@brief Calculates the reversed outputs of the function "maximum"
+
+*@par Inputs:
+*Three inputs, including:
+* @li grads: A mutable Tensor. Must be one of the following types:
+*     float16, float32, int32.
+* @li x1: A mutable Tensor of the same type as "grads".
+* @li x2: A mutable Tensor of the same type as "grads". \n
+
+*@par Attributes:
+*@li grad_x: An optional bool. Defaults to "True".
+*     If "True", "y1" will be output.
+*     If "False", "y1" will not be output. \n
+
+*@li grad_y: An optional bool. Defaults to "True".
+*     If "True", "y2" will be output.
+*     If "False", "y2" will not be output. \n
+
+*@par Outputs:
+* @li y1: A mutable Tensor. Has the same type as "grads".
+* @li y2: A mutable Tensor. Has the same type as "grads". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaximumGrad.
+*/
+REG_OP(MaximumGrad)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(grad_x, Bool, true)
+    .ATTR(grad_y, Bool, true)
+    .OP_END_FACTORY_REG(MaximumGrad)
+
+/**
+*@brief Calculates the reversed outputs of the function "minimum"
+
+*@par Inputs:
+*Three inputs, including:
+* @li grads: A mutable Tensor. Must be one of the following types:
+*     float16, float32, int32.
+* @li x1: A mutable Tensor of the same type as "grads".
+* @li x2: A mutable Tensor of the same type as "grads". \n
+
+*@par Attributes:
+*@li grad_x: An optional bool. Defaults to "True".
+*     If "True", "y1" will be output.
+*     If "False", "y1" will not be output. \n
+
+*@li grad_y: An optional bool. Defaults to "True".
+*     If "True", "y2" will be output.
+*     If "False", "y2" will not be output. \n
+
+*@par Outputs:
+* @li y1: A mutable Tensor. Has the same type as "grads".
+* @li y2: A mutable Tensor. Has the same type as "grads". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MinimumGrad.
+*/
+REG_OP(MinimumGrad)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(grad_x, Bool, true)
+    .ATTR(grad_y, Bool, true)
+    .OP_END_FACTORY_REG(MinimumGrad)
+
+/**
+*@brief Cast a tensor form src data type to dst data type. \n
+
+*@par Inputs:
+*One input:
+*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
+   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+   For float32 type, the actual calculation on the chip is based on float16.  \n
+
+*@par Attributes:
+*dst_type: An required attribute of type int32, specifying the dst data type. \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x.
+*/
+REG_OP(Cast)
+    .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                          DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
+                          DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                           DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .REQUIRED_ATTR(dst_type, Int)
+    .OP_END_FACTORY_REG(Cast)
+
+/**
+*@brief Returns the truth value of (x1 >= x2) element-wise. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+*    double, int32, int8, uint8, int64, uint16, uint32, uint64.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator GreaterEqual.
+*/
+REG_OP(GreaterEqual)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(GreaterEqual)
+
+/**
+*@brief Returns the truth value of (x1 < x2) element-wise. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, int64, uint16, uint32, uint64.
+* @li x2: A Tensor with the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of type bool. \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator Less.
+*/
+REG_OP(Less)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(Less)
+
+/**
+*@brief Returns x1/x2 element-wise for real types. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: float16, float32, double, uint16,
+         int8, uint8, int16, int32, int64, complex64, DT_COMPLEX128.
+*@li x2: A Tensor. Must be one of the following types: float16, float32, double, uint16,
+         int8, uint8, int16, int32, int64, complex64, DT_COMPLEX128. \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type and format as input "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator RealDiv.
+*/
+REG_OP(RealDiv)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(RealDiv)
+
+/**
+*@brief Computes square root of x element-wise. \n
+
+*@par Inputs:
+*  x: A Tensor. Must be one of the following types: float16, float32, complex128, complex64, float64. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Sqrt.
+*/
+REG_OP(Sqrt)
+    .INPUT(x, TensorType{(DT_FLOAT. DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128)})
+    .OUTPUT(y, TensorType{(DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128)})
+    .OP_END_FACTORY_REG(Sqrt)
+
+/**
+*@brief Returns the max of "x" and "y" (i.e. x > y ? x: y) element-wise. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32, int64.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Maximum.
+*/
+REG_OP(Maximum)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .OP_END_FACTORY_REG(Maximum)
+
+/**
+*@brief Returns the min of x and y (i.e. x1 < x2 ? x1 : x2) element-wise. \n
+
+*@par Inputs:
+*Two inputs, include:
+* @li x1: A Tensor. Must be one of the following types: float32, float16, double, int32, int64.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Minimum.
+*/
+REG_OP(Minimum)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32,
+                           DT_INT64}))
+    .OP_END_FACTORY_REG(Minimum)
+
+/**
+*@brief: Computes the reciprocal of "x". \n
+
+*@par Inputs:
+*One inputs, include:
+*x:A Tensor of type float16, float32, int32, int64, double,
+*     complex64, complex128.the format can be [NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND]
+
+*@par Outputs:
+*y:A Tensor with same type as "x". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Reciprocal.
+*/
+REG_OP(Reciprocal)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_FLOAT16,
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_FLOAT16
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Reciprocal)
+
+/**
+*@brief Returns x - y element-wise.
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, float64,
+*     float16, float32, complex128, complex64, uint16.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Subtract.
+*/
+REG_OP(Sub)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DT_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Sub)
+
+/**
+*@brief computes the absolute value of a tensor. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Abs.
+*/
+REG_OP(Abs)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(Abs)
+
+/**
+*@brief Computes gradients for absolute operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor of type float16 or float32.
+*@li dy: A tensor of the same type as "y".
+*
+*@attention Constraints:
+* "dy" has the same type as "y".
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AbsGrad.
+*
+*/
+REG_OP(AbsGrad)
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(AbsGrad)
+
+/**
+*@brief: Computes the sign  of "x". \n
+
+*@par Inputs:
+*x:An ND Tensor of type float16, float32, int32, int64, double,
+*     complex64, complex128. \n
+
+*@par Outputs:
+*y:An ND Tensor with same type as "x". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Sign.
+*/
+REG_OP(Sign)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT, DT_DOUBLE, DT_INT32,
+                          DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Sign)
+
+/**
+*@brief Returns (x1 - x2)(x1 - x2) element-wise. \n
+
+*@par Inputs:
+*Two inputs, including: \n
+*@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128
+*@li x2: A Tensor. Has the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator SquaredDifference.
+*/
+REG_OP(SquaredDifference)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SquaredDifference)
+
+/**
+*@brief Computes cosine of "x" element-wise. \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32, double, complex64, complex128.
+* the format can be [NCHW,NC1HWC0,NHWC,ND]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cos. \n
+
+*/
+REG_OP(Cos)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Cos)
+
+/**
+*@brief Returns x1/x2 element-wise. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types:
+*    float16, float32, int32, int8, uint8, float64, int64, uint16, int16,
+*    complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND].
+*@li x2: A Tensor. Has the same type and format as input "x1". \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type and format as input "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Div.
+*/
+REG_OP(Div)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_INT64, DT_UINT16, DT_INT16,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_INT64, DT_UINT16, DT_INT16,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_INT64, DT_UINT16, DT_INT16,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Div)
+
+/**
+*@brief: Returns the truth value of (x = y) element-wise. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types:
+*    float16, float32, int32, int8, uint8, double, int16, int64, complex64,
+*    complex128, quint8, qint8, qint32, string, bool. the format can be
+*    [NCHW, NC1HWC0, NHWC, ND]
+*@li x2: A Tensor of the same type and format as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of type bool. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Equal.
+*/
+REG_OP(Equal)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8,
+                           DT_DOUBLE, DT_INT16, DT_INT64, DT_COMPLEX64,
+                           DT_COMPLEX128, DT_QUINT8, DT_QINT8, DT_QINT32,
+                           DT_STRING, DT_BOOL}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8,
+                           DT_DOUBLE, DT_INT16, DT_INT64, DT_COMPLEX64,
+                           DT_COMPLEX128, DT_QUINT8, DT_QINT8, DT_QINT32,
+                           DT_STRING, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(Equal)
+
+/**
+*@brief Computes the exponential of "x" element-wise. \n
+
+*@par Inputs:
+*One input:\n
+*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
+
+*@par Attributes:
+*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0".
+*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0".
+*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Exp.
+*/
+REG_OP(Exp)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .ATTR(base, Float, -1.0)
+    .ATTR(scale, Float, 1.0)
+    .ATTR(shift, Float, 0.0)
+    .OP_END_FACTORY_REG(Exp)
+
+/**
+*@brief Computes the exp(x) - 1 element-wise, y = e^x - 1. \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Expm1.
+*/
+REG_OP(Expm1)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Expm1)
+
+/**
+*@brief: Computes the reciprocal of "x". \n
+
+*@par Inputs:\n
+*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Inv.
+*/
+REG_OP(Inv)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .OP_END_FACTORY_REG(Inv)
+
+/**
+*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", and "dy"
+    is the corresponding input gradient. \n
+
+*@par Inputs:
+* Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8.
+* @li grad: A Tensor. Has the same type as "x". \n
+
+*@par Outputs:
+*y: A Tensor, Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator InvGrad.
+*/
+REG_OP(InvGrad)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OP_END_FACTORY_REG(InvGrad)
+
+/**
+*@brief: Returns the truth value of (x <= y) element-wise. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: float32, float64,
+* int32, uint8, int16, int8, int64, qint8, quint8, qint32, uint16,
+* float16, uint32, uint64.
+*@li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of type bool. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator LessEqual.
+*/
+REG_OP(LessEqual)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(LessEqual)
+
+/**
+*@brief Computes the logarithm of (x + 1) element-wise, y = ln(x + 1). \n
+
+*@par Inputs:
+*One input:\n
+*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Log1p.
+*/
+REG_OP(Log1p)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Log1p)
+
+/**
+*@brief Returns element-wise remainder of division.
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+ * int32, int64, int8, uint8, double.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1".
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Mod.
+*/
+REG_OP(Mod)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8,
+                           DT_INT64, DT_DOUBLE}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8,
+                           DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8,
+                           DT_INT64, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Mod)
+
+/**
+*@brief: Returns the truth value of (x != y) element-wise. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: float16, float32, int32,
+ * int8, uint8, double, int16, int64, uint16, half, uint32, uint64
+*@li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of type bool. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator NotEqual.
+*/
+REG_OP(NotEqual)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(NotEqual)
+
+/**
+*@brief Computes numerical negative value element-wise (y = -x)
+
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float16, float32, int32,
+ * int64, complex64, complex128. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Neg.
+*/
+REG_OP(Neg)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Neg)
+
+/**
+*@brief Returns x1/x2 element-wise for integer types. \n
+
+*@par Inputs:
+*@li x1: A Tensor. Must be one of the following types:
+*     float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128.
+*@li x2: A Tensor of the same data type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1".
+
+*@attention Constraints:
+* Broadcasting is supported. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator TruncateDiv. \n
+
+*/
+REG_OP(TruncateDiv)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_UINT16, DT_INT16, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_UINT16, DT_INT16, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32,
+                           DT_DOUBLE, DT_UINT16, DT_INT16, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(TruncateDiv)
+
+/**
+*@brief Computes x1/x2 element-wise, if x1 == 0, return 0.
+
+*@par Inputs:
+* Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+* double, complex64, complex128.
+* @li x2: A Tensor. Has the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Xdivy.
+*/
+REG_OP(Xdivy)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Xdivy)
+
+/**
+*@brief Computes "x" multiplied by the logarithm of y element-wise,
+* if "x" == 0, return "0". \n
+
+*@par Inputs:
+* Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+* double, complex64, complex128.
+* @li x2: A Tensor. Has the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Xlogy.
+*/
+REG_OP(Xlogy)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Xlogy)
+
+/**
+*@brief Computes square of "x" element-wise. \n
+
+*@par Inputs:
+*One input: \n
+*x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Square.
+*/
+REG_OP(Square)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT,
+                          DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT,
+                           DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Square)
+
+
+/**
+*@brief Computes reciprocal of square root of "x" element-wise: y = 1/sqrt{x}. \n
+
+*
+*@par Inputs:
+* x: An ND or 5HD tensor. Must be one of the following types: float, double, half,
+ * complex64, complex128.
+*
+*@par Outputs:
+* y: An ND or 5HD tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Rsqrt.
+*
+*/
+REG_OP(Rsqrt)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Rsqrt)
+
+/**
+*@brief Computes the trignometric inverse sine of "x" element-wise. \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Asin.
+*
+*/
+REG_OP(Asin)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                          DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                           DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Asin)
+
+/**
+*@brief Computes gradients for Asin operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128.
+*@li dy: A tensor of the same type as "y".
+*
+*@attention Constraints:
+* "dy" has the same type as "y".
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AsinGrad.
+*
+*/
+REG_OP(AsinGrad)
+  .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                        DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                         DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+  .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                         DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+  .OP_END_FACTORY_REG(AsinGrad)
+
+/**
+*@brief Computes acos of x element-wise. \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Acos.
+*
+*/
+REG_OP(Acos)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                          DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                           DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Acos)
+
+/**
+*@brief Computes gradients for Acos operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor of type float16 or float32.
+*@li dy: A tensor of the same type as "y".
+*
+*@attention Constraints:
+* "dy" has the same shape as "y".
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AcosGrad.
+*
+*/
+REG_OP(AcosGrad)
+  .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(AcosGrad)
+
+/**
+*@brief Computes inverse hyperbolic cosine of x element-wise. \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
+*
+*@attention Constraints:
+* x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n
+*   Input range is [1, inf].
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Acosh.
+*
+*/
+REG_OP(Acosh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Acosh)
+
+/**
+*@brief Computes gradients for Acosh operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor of type float16 or float32.
+*@li dy: A tensor of the same type as "y".
+*
+*@attention Constraints:
+* "dy" has the same type as "y".
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AcoshGrad.
+*
+*/
+REG_OP(AcoshGrad)
+  .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(AcoshGrad)
+
+/**
+*@brief Returns the truth value of x1 OR x2 element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor of type bool.
+*@li x2: A tensor of the same type as "x1".
+*
+*@attention Constraints:
+* LogicalOr supports broadcasting.
+*
+*@par Outputs:
+* y: A tensor of the same type as "x1".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LogicalOr.
+*
+*/
+REG_OP(LogicalOr)
+    .INPUT(x1, TensorType({DT_BOOL}))
+    .INPUT(x2, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(LogicalOr)
+
+/**
+*@brief Returns the truth value of x1 AND x2 element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor of type bool.
+*@li x2: A tensor of the same type as "x1".
+*
+*@attention Constraints:
+* LogicalAnd supports broadcasting.
+*
+*@par Outputs:
+* y: A tensor of the same type as "x1".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LogicalAnd.
+*
+*/
+REG_OP(LogicalAnd)
+    .INPUT(x1, TensorType({DT_BOOL}))
+    .INPUT(x2, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(LogicalAnd)
+
+/**
+*@brief Computes the Bessel i0e function of "x" element-wise.
+* Exponentially scaled modified Bessel function of order 0
+* defined as: bessel_i0e(x) = exp(-abs(x)) bessel_i0(x).
+* This function is faster and numerically stabler than "bessel_i0(x)".
+*
+*@par Inputs:
+* x: A tensor of type float16, float32, or float64.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BesselI0e.
+*
+*/
+REG_OP(BesselI0e)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(BesselI0e)
+
+/**
+*@brief Computes the Bessel i1e function of "x" element-wise.
+* Exponentially scaled modified Bessel function of order 0
+* defined as: bessel_i1e(x) = exp(-abs(x)) bessel_i1(x).
+* This function is faster and numerically stabler than "bessel_i1(x)".
+*
+*@par Inputs:
+* x: A tensor of type float16, float32, or float64.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BesselI1e.
+*
+*/
+REG_OP(BesselI1e)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(BesselI1e)
+
+/**
+* @brief Computes logarithm of x element-wise.
+* y = log_base(shift + scale * x), with "base" > 0. \n
+
+* @par Inputs:
+* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n
+
+* @par Attributes:
+* @li base: An optional float32, specifying the base "e". Defaults to "-1.0"
+
+* @li scale: An optional float32, specifying the scale of input "x". Defaults
+* to "1.0"
+* @li shift: An optional float32, specifying the shift. Defaults to "0.0"
+
+* @par Outputs:
+* y: A Tensor has same type as "x". \n
+
+* @attention Constraints:
+* @li "base" is supposed to be greater than 0. Retaining the default
+* value "-1" sets "base" to "e".
+* @li If the input value of operator Log is within the range (0, 0.01] or
+* [0.95, 1.05], the output accuracy is subject to change. \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator Log.
+* @li Compatible with the Caffe operator Log.
+*/
+REG_OP(Log)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .ATTR(base, Float, -1.0)
+    .ATTR(scale, Float, 1.0)
+    .ATTR(shift, Float, 0.0)
+    .OP_END_FACTORY_REG(Log)
+
+/**
+* @brief Returns x1 * x2 element-wise.
+* y = x1 * x2
+
+* @par Inputs:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128.
+* @li x2: A Tensor. Must be one of the following types: float16, float32,
+* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n
+
+* @par Outputs:
+* y: A Tensor. Must be one of the following types: float16, float32, float64,
+* uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n
+
+* @attention Constraints:
+* @li "x1" and "x2" have incompatible shapes or types. \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Multiply.
+*/
+REG_OP(Mul)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DI_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DI_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8,
+                           DI_UINT16, DT_INT16, DT_INT32, DT_INT64,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Mul)
+
+/**
+* @brief Computes the gradient of the square root of "x" with regard to its
+* input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding
+* input gradient. \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li y: A Tensor of type float32 or float16.
+* @li dy: A Tensor. Has the same type as "y". \n
+
+* @par Outputs:
+* z: A Tensor. Has the same type as "y". \n
+
+* @attention Constraints:
+* "dy" has the same shape and type as "y".
+*/
+REG_OP(SqrtGrad)
+    .INPUT(y, TensorType(UnaryDataType))
+    .INPUT(dy, TensorType(UnaryDataType))
+    .OUTPUT(z, TensorType(UnaryDataType))
+    .OP_END_FACTORY_REG(SqrtGrad)
+
+/**
+*@brief Returns x + y element-wise.
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, float64,
+*     float16, float32, complex128, complex64, string.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Add.
+*/
+REG_OP(Add)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX128,
+                           DT_COMPLEX64, DT_STRING}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX128,
+                           DT_COMPLEX64, DT_STRING}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX128,
+                           DT_COMPLEX64, DT_STRING}))
+    .OP_END_FACTORY_REG(Add)
+
+/**
+*@brief Confuse broadcast, add and mul. \n
+
+*@par Inputs:
+*Five inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32 float16, float32.
+* @li x2: A Tensor of the same type as "x1".
+* @li x3: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator LRN.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+
+REG_OP(FusedMulAdd)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OP_END_FACTORY_REG(FusedMulAdd)
+
+/**
+*@brief Returns x1 + x2 element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128.
+*@li x2: A tensor of the same type as "x1".
+*
+*@attention Constraints:
+* AddV2 supports broadcasting.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x1".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AddV2.
+*
+*/
+REG_OP(AddV2)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16,
+                           DT_INT8, DT_UINT8, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(AddV2)
+
+/**
+*@brief Updates "ref" by adding "value" to it. \n
+
+*@par Inputs:
+*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li value: A Tensor of the same type as "ref". \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+              If "True", the addition will be protected by a lock;
+              otherwise the behavior is undefined, but may exhibit less contention.
+*             This attribute is reserved. \n
+
+*@par Outputs:
+*ref: A Tensor that holds the new value of ref after the value has been added. \n
+
+*@attention Constraints:
+*An input tensor of type int64 must have a shape with size 1. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AssignAdd.
+*/
+REG_OP(AssignAdd)
+    .INPUT(ref, TensorType::BasicType())
+    .INPUT(value,TensorType::BasicType())
+    .OUTPUT(ref, TensorType::BasicType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(AssignAdd)
+
+/**
+*@brief Updates "ref" by assigning "value" to it. \n
+
+*@par Inputs:
+*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li value: A Tensor of the same type as "ref". \n
+
+*@par Attributes:
+*@li validate_shape: An optional bool. Defaults to "true".
+                     If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to.
+*                    If "false", "ref" will take on the shape of "value".
+*                    This attribute is reserved.
+*@li use_locking: An optional bool. Defaults to True.
+                  If True, the assignment will be protected by a lock;
+                  otherwise the behavior is undefined, but may exhibit less contention.
+*                 This attribute is reserved. \n
+
+*@par Outputs:
+*ref: A Tensor that holds the new value of ref after the value has been assigned. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Assign.
+*/
+REG_OP(Assign)
+    .INPUT(ref, TensorType::BasicType())
+    .INPUT(value,TensorType::BasicType())
+    .OUTPUT(ref, TensorType::BasicType())
+    .ATTR(validate_shape, Bool, true)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(Assign)
+
+/**
+*@brief Updates "var" by subtracting "value" from it.\n
+* This operation outputs "var" after the update is done. \n
+* This makes it easier to chain operations that need to use the reset value. \n
+
+*
+*@par Inputs:
+*@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64
+*@li value: A tensor of the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False". If "True", the subtraction will be protected \n
+* by a lock; otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AssignSub.
+*
+*/
+REG_OP(AssignSub)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(value,TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(AssignSub)
+
+/**
+*@brief: Computes the backpropagation of the square root operation. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \
+ * float, int32, int8, double, complex64, complex128, half.
+*@li dy: A Tensor of the same type and format as "y". \n
+
+*@par Outputs:
+*z: A Tensor of the same type and format as "y". \n
+
+*@see Matmul() | Rsqrt ()
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator RsqrtGrad.
+*/
+REG_OP(RsqrtGrad)
+    .INPUT(y, TensorType({UnaryDataType,int32,int8}))
+    .INPUT(dy, TensorType({UnaryDataType,int32,int8}))
+    .OUTPUT(z, TensorType({UnaryDataType,int32,int8}))
+    .OP_END_FACTORY_REG(RsqrtGrad)
+
+/**
+*@brief Computes hyperbolic sine of "x" element-wise. \n
+
+*@par Inputs:
+*x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
+ * complex128, half. \n
+
+*@par Outputs:
+*y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64,
+ * complex128, half. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sinh. \n
+
+*/
+REG_OP(Sinh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Sinh)
+
+/**
+*@brief: Clips tensor values to a specified min and max. \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A Tensor of type  float32, float64, int32, uint8, int16, int8, complex64, int64,
+*qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64.
+*@li clip_value_min: A Tensor of the same type as "x".
+*@li clip_value_max: A Tensor of the same type as "x". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ClipByValue.
+*/
+REG_OP(ClipByValue)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(clip_value_min, TensorType::NumberType())
+    .INPUT(clip_value_max, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .OP_END_FACTORY_REG(ClipByValue)
+
+/**
+*@brief Computes cosine of "x" element-wise. \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32, double, complex64, complex128.
+* the format can be [NCHW,NC1HWC0,NHWC,ND]. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cosh. \n
+
+*/
+REG_OP(Cosh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Cosh)
+
+/**
+*@brief: Returns 0 if the denominator is zero, else, like Div. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types:float16, float32, int32,
+*    int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND].
+*@li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator DivNoNan.
+*/
+REG_OP(DivNoNan)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .OP_END_FACTORY_REG(DivNoNan)
+
+/**
+*@brief Reverses specific dimensions of a tensor. \n
+
+*@par Inputs:
+* One input: \n
+*x: A Tensor, Must be one of the following types:
+*    int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
+*    and format can be [NCHW,NC1HWC0,NHWC,ND]
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x"
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Invert.
+*/
+REG_OP(Invert)
+    .INPUT(x, TensorType::IntegerDataType())
+    .OUTPUT(y, TensorType::IntegerDataType())
+    .OP_END_FACTORY_REG(Invert)
+
+/**
+*@brief Returns a tensor of the same shape and type with all elements set to one.
+*@par Inputs:
+*One input: \n
+*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8,
+ * int16, uint16, int32, int64, complex128, bool. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator OnesLike.
+*/
+REG_OP(OnesLike)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8,
+                          DT_UINT8, DT_INT16, DI_UINT16, DT_INT32,
+                          DT_INT64, DT_COMPLEX128, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8,
+                           DT_UINT8, DT_INT16, DI_UINT16, DT_INT32,
+                           DT_INT64, DT_COMPLEX128, DT_BOOL}))
+    .OP_END_FACTORY_REG(OnesLike)
+
+/**
+*@brief Computes the gradient for the inverse of "x" with regard its input. \n
+
+*@par Inputs:
+*@li input_y: A Tensor. Must be one of the following types: float, double,
+ * complex64, complex128, half.
+*@li input_dy: A Tensor. Must be one of the following types: float, double,
+ * complex64, complex128, half. \n
+
+*@par Outputs:
+*output_data: A Tensor. Must be one of the following types: float, double,
+ * complex64, complex128, half. \n
+
+*@attention Constraints:
+* "input_dy" has the same shape and type as "input_y". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator reciprocal_grad.
+*/
+REG_OP(ReciprocalGrad)
+    .INPUT(y, TensorType::UnaryDataType())
+    .INPUT(dy, TensorType::UnaryDataType())
+    .OUTPUT(z, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(ReciprocalGrad)
+
+/**
+*@brief Returns the truth value of (x1 > x2) element-wise. \n
+
+*@par Inputs:
+*@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8,
+*    uint8, uint16, uint32, uint64.
+*@li x2: A Tensor of the same data type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor of type bool.
+
+*@attention Constraints:
+* Broadcasting is supported. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Greater. \n
+
+*/
+REG_OP(Greater)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(Greater)
+
+/**
+*@brief Returns a tensor of the same type and shape as the input tensor with all elements set to zero. \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*     float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128. \n
+
+*@par Outputs:
+*y: A Tensor of the same data type as "x". \n
+
+*@attention Constraints:
+* The output has the same shape and type as the input. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator zeros_like.
+*/
+REG_OP(ZerosLike)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(ZerosLike)
+
+/**
+*@brief Returns the truth value of NOT "x" element-wise. \n
+
+*@par Inputs:
+*x: A Tensor of type bool. \n
+
+*@par Outputs:
+*y: A Tensor of type bool. \n
+
+*@attention Constraints:
+* The input and output values are "1" or "0", corresponding to bool values "true" and "false". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator logical_not.
+*/
+REG_OP(LogicalNot)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(LogicalNot)
+
+/**
+*@brief Computes inverse hyperbolic sine of x element-wise.
+* Given an input tensor, this function computes inverse hyperbolic sine for every element in the tensor. \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Asinh.
+*
+*/
+REG_OP(Asinh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Asinh)
+
+/**
+*@brief Computes gradients for Asinh operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor. Must be one of the following types: float16, float32.
+*@li dy: A tensor of the same type as "y"
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AsinhGrad.
+*
+*/
+REG_OP(AsinhGrad)
+  .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(AsinhGrad)
+
+/**
+*@brief Computes inverse hyperbolic tangent of x element-wise.\n
+* Given an input tensor, this function computes inverse hyperbolic tangent for every element in the tensor. \n Input range is [-1,1] and output range is [-inf, inf]. If input is -1, \n output will be -inf and if the input is 1, output will be inf.\n  Values outside the range will have nan as output. \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Atanh.
+*
+*/
+REG_OP(Atanh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Atanh)
+
+/**
+*@brief Computes the trignometric inverse tangent of x element-wise.
+* The atan operation returns the inverse of tan, such that if y = tan(x) then, x = atan(y). \n
+
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x". The output of atan will lie within the invertible range of tan, i.e (-pi/2, pi/2).
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Atan.
+*
+*/
+REG_OP(Atan)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Atan)
+
+/**
+*@brief Computes gradients for Atan operation. \n
+
+*
+*@par Inputs:
+*@li y: A tensor of type float16 or float32.
+*@li dy: A tensor of the same type as "y"
+*
+*@par Outputs:
+* z: A tensor. Has the same type as "y".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AtanGrad.
+*
+*/
+REG_OP(AtanGrad)
+  .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(AtanGrad)
+
+/**
+*@brief Computes arctangent of x1/x2 element-wise, respecting signs of the arguments. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float16, float32, float64
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x1".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Atan2.
+*
+*/
+REG_OP(Atan2)
+    .INPUT(x1, TensorType::FloatingDataType())
+    .INPUT(x2, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Atan2)
+
+/**
+*@brief Returns the truth value of abs(x1-x2) < tolerance element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Attributes:
+* tolerance: Defaults to "1e-05".
+*
+*@par Outputs:
+* y: A tensor of type bool.
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApproximateEqual.
+*
+*/
+REG_OP(ApproximateEqual)
+  .INPUT(x1, TensorType::NumberType())
+  .INPUT(x2, TensorType::NumberType())
+  .OUTPUT(y, TensorType({DT_BOOL}))
+  .ATTR(tolerance, Float, 1e-5)
+  .OP_END_FACTORY_REG(ApproximateEqual)
+
+/**
+*@brief Returns the element-wise sum of a list of tensors.\n
+* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs
+to be ready before beginning to sum.\n This can save memory if inputs are ready at different times,
+since minimum temporary storage is proportional to the output size rather than the inputs size.
+ Returns a Tensor of same shape and type as the elements of inputs. \n
+
+*
+*@par Inputs:
+*Dynamic inputs, including:
+* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64,
+qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Attributes:
+* N: the size of x.
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator AccumulateNV2.
+*
+*/
+REG_OP(AccumulateNV2)
+   .DYNAMIC_INPUT(x, TensorType::NumberType())
+   .OUTPUT(y, TensorType::NumberType())
+   .REQUIRED_ATTR(N, Int)
+   .OP_END_FACTORY_REG(AccumulateNV2)
+
+/**
+*@brief Fake-quantizes the input Tensor, type float to output a Tensor of same type.
+*  [min, max] define the clamping range for the "inputs" data.\n
+*  the values of "x" are quantized into the quantization range ([0, 2^num_bits - 1] \n
+*  when "narrow_range" is "false" or [1, 2^num_bits - 1] when it is "true") and \n
+*  then de-quantized and output as float32 in [min; max] interval.\n
+*  num_bits is the bit width of the quantization, between 2 and 16, inclusive. \n
+*  Quantization is called fake since the output is still in floating point. \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor of type float32. \n
+
+*@par Attributes:
+*@li min: An optional attribute. Defaults to "-6.0".
+*@li max: An optional attribute. Defaults to "6.0".
+*@li num_bits: An optional attribute. Defaults to "8".
+*@li narrow_range: An optional bool. Defaults to "false". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same shape and type of "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator FakeQuantWithMinMaxArgs.
+*/
+REG_OP(FakeQuantWithMinMaxArgs)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(min, Float, -6.0)
+    .ATTR(max, Float, 6.0)
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxArgs)
+
+/**
+*@brief Computes gradients for a FakeQuantWithMinMaxArgs operation. \n
+
+*@par Inputs:
+*Two inputs, including: \n
+*@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+*@li x: A Tensor of type float32. Has the same type and format as "gradients".\n
+* This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n
+
+*@par Attributes:
+*@li min: An optional attribute. Defaults to "-6.0".
+*@li max: An optional attribute. Defaults to "6.0".
+*@li num_bits: An optional attribute. Defaults to "8".
+*@li narrow_range: An optional bool. Defaults to "False". \n
+
+*@par Outputs:
+*y: A Tensor of type float32. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator FakeQuantWithMinMaxArgsGradient.
+*/
+REG_OP(FakeQuantWithMinMaxArgsGradient)
+    .INPUT(gradients, TensorType({DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(min, Float, -6.0)
+    .ATTR(max, Float, 6.0)
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxArgsGradient)
+
+/**
+*@brief Fake-quantize the 'inputs' tensor of type float via global float scalars. \n
+
+*@par Inputs:
+*Three inputs, including:
+*@li x: A Tensor of type float32.
+*@li min: A Tensor of type float32. Has the same type and format as "x".
+*@li max: A Tensor of type float32. Has the same type and format as "x".\n
+*[min; max] define the clamping range for the inputs data
+
+*@par Attributes:
+*@li num_bits: An optional attribute. Defaults to "8".
+*@li narrow_range: An optional bool. Defaults to "False". \n
+
+*@par Outputs:
+*y: A Tensor of type float32. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator FakeQuantWithMinMaxVars.
+*/
+REG_OP(FakeQuantWithMinMaxVars)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(min, TensorType({DT_FLOAT}))
+    .INPUT(max, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxVars)
+
+/**
+*@brief Computes gradients for a FakeQuantWithMinMaxVars operation. \n
+
+*@par Inputs:
+*Four inputs, including:
+*@li gradients: A Tensor of type float32.
+*@li x: A Tensor of type float32.
+*@li min: A Tensor of type float32.
+*@li max: A Tensor of type float32. \n
+
+*@par Attributes:
+*@li num_bits: An integer specifying the quantization bit width. Defaults to "8".
+*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n
+
+*@par Outputs:
+*@li backprops_wrt_x: A Tensor. Has the same type as input "x".
+*@li backprops_wrt_min: A Tensor. Has the same type as input "min".
+*@li backprops_wrt_max: A Tensor. Has the same type as input "max". \n
+
+*@attention Constraints:
+*@li "gradients" has the same shape as "x".
+*@li "min" and "max" are scalars.
+*@li "num_bits" is between 2 and 16
+
+*@see Region()
+
+*@par Third-party framework compatibility
+* Compatible with the operator FakeQuantWithMinMaxVarsGradient.
+*/
+REG_OP(FakeQuantWithMinMaxVarsGradient)
+    .INPUT(gradients, TensorType({DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(min, TensorType({DT_FLOAT}))
+    .INPUT(max, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_x, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_min, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_max, TensorType({DT_FLOAT}))
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsGradient)
+
+/**
+*@brief Fake-quantizes the "inputs" tensor of type float
+via per-channel floats min and max of shape [d] to "outputs" \n
+tensor of same shape as inputs
+
+*@par Inputs:
+*Three inputs, including:
+*@li x: A Tensor of type float32.
+*@li min: A Tensor of type float32.
+*@li max: A Tensor of type float32. \n
+
+*@par Attributes:
+*@li num_bits: An integer specifying the quantization bit width. Defaults to "8".
+*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as input "x".
+
+
+*@attention Constraints:
+*@li "min" and "max" have one-dimensional shapes.
+*@li "min" has the same last dimension size as "x". "max" has the same last dimension size as "x".
+*@li "num_bits" is between 2 and 16
+
+*@see Region()
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FakeQuantWithMinMaxVarsPerChannel.
+*/
+REG_OP(FakeQuantWithMinMaxVarsPerChannel)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(min, TensorType({DT_FLOAT}))
+    .INPUT(max, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsPerChannel)
+
+/**
+*@brief Computes gradients for a FakeQuantWithMinMaxVarsPerChannel operation. \n
+
+*@par Inputs:
+*Four inputs, including:
+*@li gradients: A Tensor of type float32.
+*@li x: A Tensor of type float32.
+*@li min: A Tensor of type float32.
+*@li max: A Tensor of type float32. \n
+
+*@par Attributes:
+*@li num_bits: An integer specifying the quantization bit width. Defaults to "8".
+*@li narrow_range: A Boolean specifying whether to use a narrow range for quantization. Defaults to "False". \n
+
+*@par Outputs:
+*@li backprops_wrt_x: A Tensor. Has the same type as input "x".
+*@li backprops_wrt_min: A Tensor. Has the same type as input "min".
+*@li backprops_wrt_max: A Tensor. Has the same type as input "max". \n
+
+*@attention Constraints:
+*@li "gradients" has the same shape as "x".
+*@li "min" and "max" have one-dimensional shapes.
+*@li "min" has the same last dimension size as "x". "max" has the same last dimension size as "x". "gradients" has the same last dimension size as "x".
+*@li "num_bits" is between 2 and 16
+
+*@see Region()
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FakeQuantWithMinMaxVarsPerChannelGradient.
+*/
+REG_OP(FakeQuantWithMinMaxVarsPerChannelGradient)
+    .INPUT(gradients, TensorType({DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(min, TensorType({DT_FLOAT}))
+    .INPUT(max, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_x, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_min, TensorType({DT_FLOAT}))
+    .OUTPUT(backprops_wrt_max, TensorType({DT_FLOAT}))
+    .ATTR(num_bits, Int, 8)
+    .ATTR(narrow_range, Bool, false)
+    .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsPerChannelGradient)
+
+/**
+*@brief Element-wise computes the bitwise AND of "x1" and "x2". \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: int8, int16,
+*     int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BitwiseAnd.
+*/
+REG_OP(BitwiseAnd)
+    .INPUT(x1, TensorType::IntegerDataType())
+    .INPUT(x2, TensorType::IntegerDataType())
+    .OUTPUT(y, TensorType::IntegerDataType())
+    .OP_END_FACTORY_REG(BitwiseAnd)
+
+/**
+*@brief Element-wise computes the bitwise OR of "x1" and "x2". \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: int8, int16,
+*     int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BitwiseOr.
+*/
+REG_OP(BitwiseOr)
+    .INPUT(x1, TensorType::IntegerDataType())
+    .INPUT(x2, TensorType::IntegerDataType())
+    .OUTPUT(y, TensorType::IntegerDataType())
+    .OP_END_FACTORY_REG(BitwiseOr)
+
+/**
+*@brief Elementwise computes the bitwise XOR of "x1" and "x2". \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*       The format is NC1HWC0 or ND. Broadcasting is supported.
+*@li x2: A Tensor. Has the same type and format as "x1". \n
+
+*@par Outputs:
+*y: Output result. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator BitwiseXor.
+*/
+REG_OP(BitwiseXor)
+    .INPUT(x1, TensorType::IntegerDataType())
+    .INPUT(x2, TensorType::IntegerDataType())
+    .OUTPUT(y, TensorType::IntegerDataType())
+    .OP_END_FACTORY_REG(BitwiseXor)
+
+/**
+*@brief Returns element-wise smallest integer not less than "x". \n
+
+*@par Inputs:
+* x: A Tensor of type float16 or float32 or float64. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Ceil.
+*/
+REG_OP(Ceil)
+  .INPUT(x, TensorType::FloatingDataType())
+  .OUTPUT(y, TensorType::FloatingDataType())
+  .OP_END_FACTORY_REG(Ceil)
+
+/**
+*@brief Returns element-wise largest integer not greater than "x". \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32 or double. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator Floor.
+*/
+REG_OP(Floor)
+  .INPUT(x, TensorType::FloatingDataType())
+  .OUTPUT(y, TensorType::FloatingDataType())
+  .OP_END_FACTORY_REG(Floor)
+
+/**
+*@brief Divides "x1/x2" element-wise, rounding toward the
+*        most negative integer. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32, int32, int64, int8,
+*     uint8, int16, uint16, double, complex64, complex128.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FloorDiv.
+*/
+REG_OP(FloorDiv)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
+                           DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
+                           DT_INT64, DT_INT16,DT_UINT16, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
+                           DT_INT64, DT_INT16,DT_UINT16, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FloorDiv)
+
+/**
+*@brief Returns element-wise remainder of division. Consistent with: floor(x1/x2) * x2 + mod(x1, x2) = x1. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types:
+*    int32, int64, float, float16, double
+*@li x2: A Tensor. Must have the same type as "x1".
+*
+*@par Outputs:
+*y: Result remainder.
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FloorMod.
+*/
+REG_OP(FloorMod)
+    .INPUT(x1, TensorType({DT_INT32,  DT_INT64, DT_FLOAT, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .INPUT(x2, TensorType({DT_INT32,  DT_INT64, DT_FLOAT, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT32,  DT_INT64, DT_FLOAT, DT_FLOAT16,
+                           DT_DOUBLE}))
+    .OP_END_FACTORY_REG(FloorMod)
+
+/**
+*@brief Computes the power of "x1" to "x2". \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int64, int8, uint8, double, complex64, complex128.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Pow.
+*/
+REG_OP(Pow)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_INT8,
+                           DT_UINT8, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_INT8,
+                           DT_UINT8, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_INT8,
+                           DT_UINT8, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Pow)
+
+/**
+*@brief Return element-wise integer closest to x. \n
+
+*@par Inputs:
+*One input, include:
+*x: A mutable Tensor. Must be one of the following types:
+*     float16, float32, double. \n
+
+*@par Outputs:
+*y: A mutable Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Rint.
+*/
+REG_OP(Rint)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Rint)
+
+/**
+*@brief Rounds the values of a tensor to the nearest integer, element-wise.
+ * Rounds half to even. \n
+
+*@par Inputs:
+*Inputs including:
+*x: A required ND Tensor of type float16, float, int64, double, complex64,
+ * complex128 or int32.
+*@par Outputs:
+*y: A required ND Tensor. Has the same data type and shape as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Round.
+*/
+REG_OP(Round)
+    .INPUT(x, TensorType(DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64,
+                         DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128))
+    .OUTPUT(y, TensorType(DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64,
+                          DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128))
+    .OP_END_FACTORY_REG(Round)
+
+/**
+*@brief: Computes sine of "x" element-wise. \n
+
+*@par Inputs:
+*One input:
+*x: An ND Tensor. Must be one of the following types: float16, float32, double,
+ * complex64, complex128, int32, int64
+
+*@par Outputs:
+*y: An ND Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Sin.
+*/
+REG_OP(Sin)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Sin)
+
+/**
+*@brief: Computes tan of "x" element-wise. \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Tan.
+*/
+REG_OP(Tan)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64,
+                          DT_COMPLEX128, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(Tan)
+
+/**
+*@brief Returns element-wise remainder of division. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+* double, int32, int64.
+* @li x2: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". \n
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator TruncateMod.
+*/
+REG_OP(TruncateMod)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64,
+                           DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64,
+                           DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64,
+                           DT_INT32}))
+    .OP_END_FACTORY_REG(TruncateMod)
+
+/**
+*@brief Adds 'bias' to 'x'. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor of type NumberType. Must be one of the following types: float32, float64, int32, uint8, int16,
+*int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64.
+* @li bias: A 1D Tensor with size the C dimension of value. \n
+
+*@par Attributes:
+*data_format: An optional string. Defaults to "NHWC". \n
+
+*@par Outputs:
+*y: A Tensor with same type as "x". \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BiasAdd.
+*/
+REG_OP(BiasAdd)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(bias, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(BiasAdd)
+
+/**
+*@brief Returns the index with the smallest value across dimensions of a tensor. \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64.
+*format is ND.
+*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across.
+* The format is ND.
+*@par Attributes:
+*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
+
+*@par Outputs:
+*y: A Tensor of type "dtype". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ArgMin.
+*/
+REG_OP(ArgMin)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(dimension, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(ArgMin)
+
+/**
+*@brief Returns the index with the smallest value across dimensions of a tensor. \n
+
+*@par Inputs:
+*One input:
+
+*x: A Tensor of type float16 or float32 in ND format. \n
+
+*@par Attributes:
+*@li dimension: The dimension of the input Tensor to reduce across.
+*@li dtype: An optional attribute, specifying the output data type. Must be "int32". Defaults to "int64". \n
+
+*@par Outputs:
+*y: A Tensor of type dtype. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ArgMin.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ArgMin instead.
+*/
+REG_OP(ArgMinD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(dimension, Int)
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(ArgMinD)
+
+/**
+*@brief Returns the index with the largest value across axes of a tensor. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A multi-dimensional Tensor of type float16, float32, or int16.
+*@li dimension: A Scalar of type int32, specifying the index with the largest value. \n
+
+*@par Attributes:
+*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
+
+*@par Outputs:
+*y: A multi-dimensional Tensor of type int32 or int64, specifying the index with the largest value. The dimension is one less than that of "x". \n
+
+*@attention Constraints:
+*@li x: If there are multiple maximum values, the index of the first maximum value is used.
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ArgMax.
+*/
+REG_OP(ArgMaxV2)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(dimension, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(ArgMaxV2)
+
+/**
+*@brief Returns the index with the largest value across axes of a tensor. \n
+
+*@par Inputs:
+* One input, including:
+*x: A multi-dimensional Tensor of type float16, float32. \n
+
+*@par Attributes:
+*@li dimension: An integer of type int32, specifying the axis information of the index with the maximum value.
+*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n
+
+*@par Outputs:
+*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n
+
+*@attention Constraints:
+*@li x: If there are multiple maximum values, the index of the first maximum value is used.
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ArgMax.
+*
+* @par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ArgMaxD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(dimension, Int)
+    .ATTR(dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(ArgMaxD)
+
+/**
+*@brief Returns the maximum value of all elements in the input in the given
+* dimension. \n
+
+*@par Inputs:
+*One input: \n
+*x: A multi-dimensional Tensor of type float16 or float32. \n
+
+*@par Attributes:
+*@li dimension: An integer of type int32, specifying the axis information of
+* the index with the maximum value.
+*@li keep_dims: A bool, specifying whether to keep dimensions for the output
+* Tensor. Defaults to "false". \n
+
+*@par Outputs:
+*@li indice: A multi-dimensional Tensor of type int32, specifying the index.
+* (If "keep_dims" is set to "false", the output dimensions are reduced by
+* "dimension" compared with that of "x". Otherwise, the output has one fewer
+* dimension than "x".)
+*@li values: A Tensor, specifying the maximum value. Has the same dimensions
+* as "indice" and the same type as "x". \n
+
+*@attention Constraints:
+*@li If there are multiple maximum values, the index of the first maximum
+* value is used.
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the
+* dimension length of "x". \n
+
+*@par Third-party framework compatibility
+* Compatible with the two output scenarios of PyTorch operator Max (the output
+* sequence is opposite to that of PyTorch).
+*/
+REG_OP(ArgMaxWithValue)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(indice,TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .REQUIRED_ATTR(dimension, Int)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ArgMaxWithValue)
+
+/**
+*@par Inputs:
+*One input: \n
+*x: A multi-dimensional Tensor of type float16 or float32. \n
+
+*@par Attributes:
+*@li dimension: An integer of type int32, specifying the axis information of
+* the index with the maximum value.
+*@li keep_dims: A bool, specifying whether to keep dimensions for the output
+* Tensor. Defaults to "false". \n
+
+*@par Outputs:
+*@li indice: A multi-dimensional Tensor of type int32, specifying the index.
+* (If "keep_dims" is set to "false", the output dimensions are reduced by
+* "dimension" compared with that of "x". Otherwise, the output has one fewer
+* dimension than "x".)
+*@li values: A Tensor, specifying the minimum value. Has the same dimensions
+* as "indice" and the same type as "x". \n
+
+*@attention Constraints:
+*@li If there are multiple minimum values, the index of the first minimum
+* value is used.
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the
+* dimension length of "x".
+*@li Performing the ArgMinWithValue operation on the last axis of float32 data
+* is not supported on a mini platform. \n
+
+*@par Third-party framework compatibility
+* Compatible with the two output scenarios of PyTorch operator Min (the output
+* sequence is opposite to that of PyTorch).
+*/
+REG_OP(ArgMinWithValue)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(indice,TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .REQUIRED_ATTR(dimension, Int)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ArgMinWithValue)
+
+/**
+*@brief Compute elementwise modes, such as 0: PRODUCT, 1: SUM, 2: MAX
+
+*@par Inputs:
+*One input: \n
+*x: the list of input data, the type of element in Tensor should be same.
+*   the max size of x is 32.
+*   should met one of the following types: float16, float32. It's a dynamic input. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x". \n
+
+*@par Attributes:
+*@li N: A required attribute. the number of input x, max size is 32. Type is int.
+*@li model: An optional attribute. Type is int. Defaults to "1".
+*    "0": product, "1": sum, "2": max.
+*@li coeff: A required attribute. Must met all of following rules:
+*    size of "coeff" must be equal to len("x") or is null.
+*    the absolute value of "coeff" must less than or equal to 1.
+*/
+REG_OP(Eltwise)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(N, Int)
+    .ATTR(mode, Int, 1)
+    .ATTR(coeff, ListFloat, {})
+    .OP_END_FACTORY_REG(Eltwise)
+
+/**
+*@brief Computes element-wise population count. \n
+
+*@par Inputs:
+*x: A Tensor of type TensorType::IntegerDataType(). \n
+
+*@par Outputs:
+*y: A Tensor of type uint8. \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator PopulationCount.
+*/
+REG_OP(PopulationCount)
+  .INPUT(x, TensorType::IntegerDataType())
+  .OUTPUT(y, TensorType({DT_UINT8}))
+  .OP_END_FACTORY_REG(PopulationCount)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Thirteen inputs, including:
+* @li input_mul3: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul2: A Tensor. Must be one of the following types: float16, float32.
+* @li input_realdiv1: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul1: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul0: A Tensor. Must be one of the following types: float16, float32.
+* @li input_realdiv0: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_sub: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_sub1: A Tensor. Must be one of the following types: float16, float32.
+* @li mul4_x: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Four outputs, including:
+* @li y1: A Tensor. Must be one of the following types: float16, float32.
+* @li y2: A Tensor. Must be one of the following types: float16, float32.
+* @li y3: A Tensor. Must be one of the following types: float16, float32.
+* @li y4: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambNextMVWithDecay)
+    .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_realdiv1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_realdiv0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_sub, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_sub1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul4_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambNextMVWithDecay)
+
+/**
+*@brief Confuse real_div, rsqrt, sqrt, maximum, minimum, sub and add. \n
+
+*@par Inputs:
+*Thirteen inputs, including:
+* @li input_mul3: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul2: A Tensor of the same type as "input1".
+* @li input_realdiv1: A Tensor of the same type as "input1".
+* @li input_mul1: A Tensor of the same type as "input1".
+* @li input_mul0: A Tensor of the same type as "input1".
+* @li input_realdiv0: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul4: A Tensor of the same type as "input1".
+* @li mul0_x: A Tensor of the same type as "input1".
+* @li mul1_sub: A Tensor of the same type as "input1".
+* @li mul2_x: A Tensor of the same type as "input1".
+* @li mul3_sub1: A Tensor. Must be one of the following types: float16, float32.
+* @li mul4_x: A Tensor of the same type as "input1".
+* @li add2_y: A Tensor of the same type as "input1". \n
+
+*@par Outputs:
+*Four outputs, including:
+*@li y1: A Tensor. Has the same type as "input_mul3".
+*@li y2: A Tensor. Has the same type as "input_mul3".
+*@li y3: A Tensor. Has the same type as "input_mul3".
+*@li y4: A Tensor. Has the same type as "input_mul3".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambNextMV)
+    .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_realdiv1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_realdiv0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_sub, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_sub1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul4_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambNextMV)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Six inputs, including:
+* @li input_square: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul2: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li truediv1_recip: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Two outputs, including:
+* @li y1: A Tensor of the same type as "input_square".
+* @li y2: A Tensor of the same type as "input_square". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambNextRight)
+    .INPUT(input_square, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(truediv1_recip, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambNextRight)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Six inputs, including:
+* @li input_greater1: A Tensor. Must be one of the following types: float16, float32.
+* @li input_greater_realdiv: A Tensor. Must be one of the following types: float16, float32.
+* @li input_realdiv: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul0: A Tensor. Must be one of the following types: float16, float32.
+* @li input_mul1: A Tensor. Must be one of the following types: float16, float32.
+* @li input_sub: A Tensor. Must be one of the following types: float16, float32.
+* @li greater_y: A Tensor. Must be one of the following types: float16, float32.
+* @li select_e: A Tensor. Must be one of the following types: float16, float32.
+* @li minimum_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "input_greater1". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambUpdateWithLr)
+    .INPUT(input_greater1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_greater_realdiv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_realdiv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_mul1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_sub, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(greater_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(select_e, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(minimum_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambUpdateWithLr)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Seven inputs, including:
+* @li x1: A Tensor. Must be one of the following types: float16, float32.
+* @li x2: A Tensor. Must be one of the following types: float16, float32.
+* @li x3: A Tensor. Must be one of the following types: float16, float32.
+* @li x4: A Tensor. Must be one of the following types: float16, float32.
+* @li x5: A Tensor. Must be one of the following types: float16, float32.
+* @li greater_y: A Tensor. Must be one of the following types: float16, float32.
+* @li select_e: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as input. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambUpdateWithLrV2)
+    .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x5, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(greater_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(select_e, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambUpdateWithLrV2)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Eleven inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul4_x: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li output0: A Tensor. Must be one of the following types: float16, float32.
+* @li output1: A Tensor. Must be one of the following types: float16, float32.
+* @li output2: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(AdamApplyOneWithDecay)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul4_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdamApplyOneWithDecay)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Ten inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li output0: A Tensor. Must be one of the following types: float16, float32.
+* @li output1: A Tensor. Must be one of the following types: float16, float32.
+* @li output2: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(AdamApplyOne)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdamApplyOne)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Eleven inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul4_x: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li output0: A Tensor. Must be one of the following types: float16, float32.
+* @li output1: A Tensor. Must be one of the following types: float16, float32.
+* @li output2: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(AdamApplyOneWithDecayAssign)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul4_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdamApplyOneWithDecayAssign)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Ten inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li output0: A Tensor. Must be one of the following types: float16, float32.
+* @li output1: A Tensor. Must be one of the following types: float16, float32.
+* @li output2: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(AdamApplyOneAssign)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdamApplyOneAssign)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Ten inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li steps: A Tensor. Must be one of the following types: float16, float32.
+* @li do_use_weight: A Tensor. Must be one of the following types: float16, float32.
+* @li weight_decay_rate: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li output0: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambApplyOptimizerAssign)
+    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(steps, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambApplyOptimizerAssign)
+
+/**
+*@brief A fusion operator for bert lamb. \n
+
+*@par Inputs:
+*Ten inputs, including:
+* @li input0: A Tensor. Must be one of the following types: float16, float32.
+* @li input1: A Tensor. Must be one of the following types: float16, float32.
+* @li input2: A Tensor. Must be one of the following types: float16, float32.
+* @li input3: A Tensor. Must be one of the following types: float16, float32.
+* @li input4: A Tensor. Must be one of the following types: float16, float32.
+* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
+* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
+* @li steps: A Tensor. Must be one of the following types: float16, float32.
+* @li do_use_weight: A Tensor. Must be one of the following types: float16, float32.
+* @li weight_decay_rate: A Tensor. Must be one of the following types: float16, float32.
+* @li add2_y: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*No outputs
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LambApplyWeightAssign)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(LambApplyWeightAssign)
+
+/**
+*@brief Confuse select, maximum, greater and sqrt. \n
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li greater_zeros: A Tensor. Must be one of the following types: float16, float32.
+* @li select_ones: A Tensor. Must be one of the following types: float16, float32.
+* @li maximum_ones: A Tensor. Must be one of the following types: float16, float32. \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ClipByNormNoDivSum)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(greater_zeros, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(select_ones, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(maximum_ones, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(ClipByNormNoDivSum)
+
+/**
+*@brief Confuse reducesumd and square. \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32. \n
+
+*@par Attributes:
+* Two attributes, including: \n
+*@li axis: A optional listint, specifies the dimensions to reduce.
+*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n
+
+*@par Outputs:
+*Two outputs, including: \n
+*@li y1: A Tensor. Has the same type as "x".
+*@li y2: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(SquareSumV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(axis, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SquareSumV2)
+
+/**
+*@brief Confuse reducesumd and square. \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32. \n
+
+*@par Attributes:
+* Two attributes, including: \n
+*@li axis: A optional listint, specifies the dimensions to reduce.
+*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". \n
+
+*@par Outputs:
+y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(SquareSumV1)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(axis, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SquareSumV1)
+
+/**
+*@brief Calculate square of Tensor and then reducesum
+
+*@par Inputs:
+*x1: A Tensor of type float32.
+*x2: A Tensor of type float32. \n
+
+*@par Outputs:
+y1: A Tensor. Has the same type as "x1".The result of "x1".
+y2: A Tensor. Has the same type as "x2".The result of "x2".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(SquareSumAll)
+    .INPUT(x1, TensorType({DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT}))
+    .OUTPUT(y1, TensorType({DT_FLOAT}))
+    .OUTPUT(y2, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(SquareSumAll)
+
+/**
+*@brief Confuse broadcast, addn and mul. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32.
+* @li x2: A Tensor of the same type as "x1".
+* @li x3: A Tensor of the same type as "x1". \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x1".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(FusedMulAddN)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .OP_END_FACTORY_REG(FusedMulAddN)
+
+/**
+*@brief Add 'bias' to 'x'. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An ND tensor of type float16 or float32.
+*@li bias: An ND tensor of type float16 or float32. \n
+
+*@par Attributes:
+*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1".
+*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1".
+*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n
+
+*@par Outputs:
+*y: An ND tensor of type float16 or float32. \n
+
+*@attention Constraints:\n
+* Assume that the shape length of "x" is "n" and that of "bias" is "m".
+*@li "axis" is within the range [-n, n-1]. num_axes >= -1.
+*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n
+* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
+*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1.
+*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
+* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n
+* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Bias.
+*/
+
+REG_OP(Bias)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
+    .INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Second operand." */
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as x" */
+    .ATTR(axis, Int, 1)
+    .ATTR(num_axes, Int, 1)
+    .ATTR(bias_from_blob, Bool, true)
+    .OP_END_FACTORY_REG(Bias)
+
+/**
+*@brief Function multiply gradients calculation.
+output0 is the result of which input0 dot multily input1.
+output1 is the result of which input0 dot multily input1, then reducesum it. \n
+
+*@par Inputs:
+*@li input0: A Tensor of input of mul, and dtype supports float16, float32.
+*@li input1: A Tensor of input of mul and mul_1, and dtype supports float16, float32.
+*@li input2: A Tensor of input of mul_1, and dtype supports float16, float32. \n
+
+*@par Attributes:
+*@li axes: The dimensions to reduce. Default:(), reduce all dimensions. \n
+Only constant value is allowed.
+*@li keep_dims: If true, keep these reduced dimensions and the length is 1. \n
+If false, don’t keep these dimensions. Default:False. \n
+
+*@par Outputs:
+*@li output0: A Tensor result of which input0 dot multily input1.
+*@li output1: A Tensor result of which input0 dot multily input1, then reducesum it.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ConfusionMulGrad)
+    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(axes, ListInt, {})
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ConfusionMulGrad)
+
+/**
+*@brief Function fused multiply l2 loss calculation. \n
+
+*@par Inputs:
+*@li x1: A Tensor of type float16, float32.
+*@li x2: A Tensor of type float16, float32.
+*@li x3: A Tensor of type float16, float32. \n
+
+*@par Outputs:
+*@li y1: A Tensor of shape and dtype of first output, which should have \n
+shape (1,) and dtype as input.
+*@li y2: A Tensor of shape and dtype of second output, should be same shape and type as input.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(FusedMulAddNL2loss)
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .INPUT(x3, TensorType::NumberType())
+    .OUTPUT(y1, TensorType::NumberType())
+    .OUTPUT(y2, TensorType::NumberType())
+    .OP_END_FACTORY_REG(FusedMulAddNL2loss)
+
+/**
+*@brief Tests whether the input exceeds a threshold. \n
+
+*@par Inputs:
+*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
+
+*@par Attributes:
+*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
+
+*@par Outputs:
+*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Threshold.
+*/
+
+ REG_OP(Threshold)
+     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+     .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+     .ATTR(threshold, Float, 0.0)
+     .OP_END_FACTORY_REG(Threshold);
+
+/**
+*@brief Returns the index number corresponding to the maximum value entered. \n
+
+*@par Inputs:
+*@li x: A tensor. Must be one of the following types: float16, float32. \n
+
+*@par Attributes:
+*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
+*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output.
+* Defaults to False
+*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)].
+* Defaults to 1
+
+*@par Outputs:
+*@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output.
+*@li values: A tensor of type float16, float32.Output tensor, including maximum index or maximum value.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator ArgMax.
+*/
+REG_OP(ArgMaxWithK)
+     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+     .OUTPUT(indices, TensorType({DT_INT32, DT_FLOAT, DT_FLOAT16}))
+     .OUTPUT(values, TensorType({DT_FLOAT, DT_FLOAT16}))
+     .ATTR(axis, Int, 10000)
+     .ATTR(out_max_val, Bool, false)
+     .ATTR(topk, Int, 1)
+     .OP_END_FACTORY_REG(ArgMaxWithK)
+
+/**
+*@brief Multiply tensor with scale. \n
+
+*@par Inputs:
+*Five inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
+* @li x2: A scale. Must be float. \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type and shape as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the Pytorch operator muls.
+*/
+REG_OP(Muls)
+     .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .OUTPUT(y, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .REQUIRED_ATTR(value, Float)
+     .OP_END_FACTORY_REG(Muls)
+
+/**
+*@brief Fill tensor with scale. \n
+
+*@par Inputs:
+*Five inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
+* @li x2: A scale. Must be float. \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type and shape as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the Pytorch operator fills.
+*/
+REG_OP(Fills)
+     .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .OUTPUT(y, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .REQUIRED_ATTR(value,Float)
+     .OP_END_FACTORY_REG(Fills)
+
+/**
+*@brief Add tensor with scale. \n
+
+*@par Inputs:
+*Five inputs, including:
+* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
+* @li x2: A scale. Must be float. \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type and shape as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the Pytorch operator adds.
+*/
+ REG_OP(Adds)
+     .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .OUTPUT(y, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16}))
+     .REQUIRED_ATTR(value,Float)
+     .OP_END_FACTORY_REG(Adds)
+
+/**
+*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. \n
+
+*@par Inputs:
+* @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128.
+* @li x2: A Tensor. Has the same type and shape as "x1". \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and shape as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator MulNoNan.
+*/
+ REG_OP(MulNoNan)
+     .INPUT(x1, TensorType::NumberType()) /* "First operand." */
+     .INPUT(x2, TensorType::NumberType()) /* "Second operand." */
+     .OUTPUT(y, TensorType::NumberType())  /* "Result, has same element type as two inputs" */
+     .OP_END_FACTORY_REG(MulNoNan)
+
+/**
+*@brief Add tensor with scale. \n
+
+*@par Inputs:
+* @li x1: A Tensor dtype of int32, float16, float32.
+* @li x2: A Tensor dtype of int32, float16, float32. \n
+
+*@par Attributes:
+*alpha: Float scalar apply to x2:x2*alpha
+
+*@par Outputs:
+*y: A Tensor. should be same shape and type as "x1". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the Pytorch operator Axpy.
+*/
+REG_OP(Axpy)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(alpha, Float)
+    .OP_END_FACTORY_REG(Axpy)
+
+/**
+*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n
+
+*@par Inputs:
+*@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32.
+*@li x2: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32.
+*@li target: A ND Tensor with one of the following types: int8, int32, float16, float32. \n
+
+*@par Attributes:
+*@li margin: A optional float32. Defaults to "0.0".
+*@li reduction: A optional string. Defaults to "mean". \n
+
+*@par Outputs:
+*@li y: A ND Tensor with Must be float32.
+*@par Third-party framework compatibility
+* Compatible with the PyTorch operator CosineEmbeddingLoss.
+*/
+REG_OP(CosineEmbeddingLoss)
+    .INPUT(x1, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x2, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(target, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(margin, Float, 0)
+    .ATTR(reduction, String, "mean")
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(CosineEmbeddingLoss)
+
+/**
+*@brief Kullback-Leibler divergence. \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: Tensor of arbitrary shape.
+*@li target: Tensor of the same shape and dtype as x. \n
+
+*@par Attributes:
+*reduction: An required "string", Specifies the reduction to apply to the output;
+* Reduction only supports the two modes of "sum" and "batchmean". \n
+
+*@par Outputs:
+*y: A ND Tensor of the same dtype as x.
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator kl_div.
+*/
+REG_OP(KLDiv)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(reduction, String)
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(KLDiv)
+
+/**
+*@brief copy data from x to y.. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(TensorMove)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
+    .OP_END_FACTORY_REG(TensorMove)
+
+/**
+*@brief copy data from x to x. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
+
+*@par Outputs:
+*output_x: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility
+*/
+REG_OP(TensorRedirect)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
+                           DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
+    .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
+                           DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
+    .OP_END_FACTORY_REG(TensorRedirect)
+
+/**
+* @brief Performs the element-wise division of tensor x2 by tensor x3,
+* multiply the result by the scalar value and add it to tensor x1
+
+* @par Inputs:
+* Three inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32.
+* @li x1: A mutable input Tensor of the same type as x1.
+* @li x2: A mutable input Tensor of the same type as x1.
+* @li value: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Outputs:
+* @li y: A mutable Tensor. Has the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcdiv.
+*/
+REG_OP(Addcdiv)
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 }))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Addcdiv)
+
+/**
+* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, 
+* multiply the result by the scalar value and add it to tensor input_data 
+
+
+* @par Inputs:
+* Three inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, int8, int32, uint8.
+* @li x1: A mutable input Tensor of the same type as x1.
+* @li x2: A mutable input Tensor of the same type as x1.
+* @li value: A tensor which includes only one element of the same type as x1. \n
+
+* @par Outputs:
+* @li y: A mutable output Tensor. Has the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcmul.
+*/
+REG_OP(Addcmul)
+    .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .OP_END_FACTORY_REG(Addcmul)
+
+/**
+* @brief Computes the result of x2 * alpha + x1.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float32, int32.
+* @li x2: An ND tensor of type float16, float32, int32.
+* @li alpha: A scalar tensor of type float16, float32. \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Axpy.
+*/
+REG_OP(AxpyV2)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(AxpyV2)
+
+/**
+* @brief Computes the result of x1 + x2.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float, int32.
+* @li x2: An ND tensor of type float16, float, int32. \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Add.
+*/
+REG_OP(PtAdd)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(PtAdd)
+
+/**
+* @brief Computes the result of x1 * x2.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float32, int32.
+* @li x2: An ND tensor of type float16, float32, int32. \n
+
+* @par Outputs:
+* @li y: Same shape and type as the largest ND tensor in x1 x2. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator muls.
+*/
+REG_OP(PtMuls)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(PtMuls)
+
+/**
+* @brief Computes the result of x1 - x2.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float, int32.
+* @li x2: An ND tensor of type float16, float, int32. \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Sub.
+*/
+REG_OP(PtSub)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(PtSub)
+
+/**
+* @brief Add the partial values of two tensors in format NC1HWC0.
+
+* @par Inputs:
+* @li x1: A Tensor in 5HD, and must be one of the following types: float16,
+* float32. \n
+* @li x2: A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. \n
+
+* @par Attributes:
+* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n
+* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n
+* @li c1_len: A required int. C1 len of "y". The value must be less than
+* the difference between C1 and offset in "x1" and "x2". \n
+
+* @par Outputs:
+* @li y:  A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. Record the result after adding. \n
+*/
+REG_OP(StrideAdd)
+    .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .REQUIRED_ATTR(x1_c1_offset, Int)
+    .REQUIRED_ATTR(x2_c1_offset, Int)
+    .REQUIRED_ATTR(c1_len, Int)
+    .OP_END_FACTORY_REG(StrideAdd)
+
+/**
+* @brief Compare two tensors are totally equal or not, only output a bool value"
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor. \n
+* @li input_y: A Tensor. the second tensor. \n
+
+* @par Outputs:
+* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch equal operator. \n
+*/
+REG_OP(TensorEqual)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(output_z, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(TensorEqual)
+
+/**
+ * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
+ * All inputs and outputs must have the same data type. This operator supports multidirectional 
+ * (i.e., Numpy-style) broadcasting
+ * 
+ * @par inputs
+ * one input including:
+ * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
+ * 
+ * @par output
+ * one output including:
+ * @li y:A Tensor of the same type as x
+ * 
+ */
+REG_OP(MaxN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) 
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaxN)
+
+/**
+ * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
+ * All inputs and outputs must have the same data type. This operator supports multidirectional 
+ * (i.e., Numpy-style) broadcasting
+ * 
+ * @par inputs
+ * one input including:
+ * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
+ * 
+ * @par output
+ * one output including:
+ * @li y:A Tensor of the same type as x
+ * 
+ */
+REG_OP(MinN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64,
+                                  DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64,
+                           DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MinN)
+
+/**
+ * @brief Calculates x * maske * value.
+ *
+ * @par Inputs:
+ * @li x: An tensor of type float16 or float32, specifying the input to the data layer.
+ * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n
+ *
+ * @par Attributes:
+ * value: A optional float. \n
+ *
+ * @par Outputs:
+ * y: The output tensor of type float16 or float32.
+ @ li y:A Tensor of the same type and shape as x
+ *
+ */
+REG_OP(MaskedScale)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .REQUIRED_ATTR(value, Float)
+    .OP_END_FACTORY_REG(MaskedScale)
+
+/**
+ * @brief Calculate the lerp function. \n
+
+ * @par Inputs:
+ * Three inputs, including:
+ * @li start: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li end: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li weight: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Outputs:
+ * y: A Tensor with the same type and shape of input_x's. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator Lerp. \n
+ */
+REG_OP(Lerp)
+    .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Lerp)
+
+/**
+*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Attributes:
+* atol: Defaults to "1e-05".
+* rtol: Defaults to "1e-03".
+*
+*@par Outputs:
+* num: A tensor of type int32.
+* diff: A tensor of type float16.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*
+*/
+REG_OP(DataCompare)
+  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .OUTPUT(num, TensorType({DT_FLOAT}))
+  .OUTPUT(diff, TensorType({DT_FLOAT16}))
+  .ATTR(atol, Float, 1e-5)
+  .ATTR(rtol, Float, 1e-3)
+  .OP_END_FACTORY_REG(DataCompare)
+
+/**
+*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
+*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
+*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
+*corresponding input.
+*
+*@par inputs
+*one input including:
+*@li x: input A Tensor.Must be one of the following types:float32,float16
+*
+*@par Attributes:
+*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
+*
+*@par output:
+*one output including:
+*@li y:A Tensor of the same type as x
+*
+*/
+REG_OP(HardMax)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axis, Int, -1)
+    .OP_END_FACTORY_REG(HardMax)
+
+/**
+* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor must be 1d. \n
+* @li input_y: A Tensor. the second tensor must be 1d. \n
+
+* @par Outputs:
+* @li output: A Tensor. Result of the two inputs, must be 1d. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch dot operator. \n
+*/
+REG_OP(Dot)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OP_END_FACTORY_REG(Dot)
+	
+/**
+*@brief Returns a new tensor with boolean elements representing \n
+*if each element of input is “close” to the corresponding element of other \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li x2: A tensor with the same type and shape of x1's. \n
+
+*@par Attributes:
+*@li rtol: An optional float.Defaults to 1e-05. \n
+*@li atol: An optional float.Defaults to 1e-08. \n
+*@li equal_nan: An optional bool.Defaults to false. \n
+
+*@par Outputs:
+*y: A Tensor bool with the same shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator isclose. \n
+*/
+REG_OP(IsClose)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(rtol, Float, 1e-05)
+    .ATTR(atol, Float, 1e-08)
+    .ATTR(equal_nan, Bool, false)
+    .OP_END_FACTORY_REG(IsClose)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/inc/ops/functional_ops.h
new file mode 100644
index 00000000..b09ac058
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/functional_ops.h
@@ -0,0 +1,333 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file functional_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+ *@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
+ *       If "cond" means True, the selected subgraph is "then_branch".
+ *       Otherwise, the selected subgraph is "else_branch" . \n
+
+ *@par Inputs:
+ *@li cond: A Tensor. If "cond" is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if "cond" is a numerical scalar, non-zero means True and zero means False;
+ *          if "cond" is a string scalar, non-empty means True and empty means False;
+ *          if "cond" is not a scalar, non-empty means True and empty means False.
+ *@li input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what else_branch returns.
+ *@li else_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what then_branch returns . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator _If.
+ */
+REG_OP(_If)
+    .INPUT(cond, TensorType::ALL())
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(then_branch)
+    .GRAPH(else_branch)
+    .OP_END_FACTORY_REG(_If)
+
+/**
+ *@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
+ *       If "cond" means True, the selected subgraph is "then_branch".
+ *       Otherwise, the selected subgraph is "else_branch" . \n
+
+ *@par Inputs:
+ *@li cond: A Tensor. If "cond" is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if "cond" is a numerical scalar, non-zero means True and zero means False;
+ *          if "cond" is a string scalar, non-empty means True and empty means False;
+ *          if "cond" is not a scalar, non-empty means True and empty means False.
+ *@li input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what else_branch returns.
+ *@li else_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what then_branch returns . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator StatelessIf.
+ */
+REG_OP(StatelessIf)
+    .INPUT(cond, TensorType::ALL())
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(then_branch)
+    .GRAPH(else_branch)
+    .OP_END_FACTORY_REG(StatelessIf)
+
+/**
+ *@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
+ *       If "cond" means True, the selected subgraph is "then_branch".
+ *       Otherwise, the selected subgraph is "else_branch" . \n
+
+ *@par Inputs:
+ *@li cond: A Tensor. If "cond" is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if "cond" is a numerical scalar, non-zero means True and zero means False;
+ *          if "cond" is a string scalar, non-empty means True and empty means False;
+ *          if "cond" is not a scalar, non-empty means True and empty means False.
+ *@li input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li then_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what else_branch returns.
+ *@li else_branch: A subgraph takes 'input' and returns a list of tensors,
+ *                 whose types are the same as what then_branch returns . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by either then_branch(input) or else_branch(input) . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator If.
+ */
+REG_OP(If)
+    .INPUT(cond, TensorType::ALL())
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(then_branch)
+    .GRAPH(else_branch)
+    .OP_END_FACTORY_REG(If)
+
+/**
+ *@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n
+
+ *@par Inputs:
+ *@li branch_index: A int32 scalar which determines the selected subgraph.
+ *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n
+
+ *@par Graphs:
+ *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
+ *          whose types are the same as what every other subgraph returns . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by one of branches . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator Case.
+ */
+REG_OP(Case)
+    .INPUT(branch_index, DT_INT32)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .DYNAMIC_GRAPH(branches)
+    .OP_END_FACTORY_REG(Case)
+
+/**
+ *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
+
+ *@par Inputs:
+ *input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li cond: A subgraph takes 'input' and returns a tensor.
+ *          If the tensor is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if it is a numerical scalar, non-zero means True and zero means False;
+ *          if it is a string scalar, non-empty means True and empty means False;
+ *          if it is not a scalar, non-empty means True and empty means False.
+ *@li body: A subgraph takes 'input' and returns a another list of tensors .  \n
+
+ *@par Attributes:
+ *parallel_iterations: An optional int, default as 10 . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "body". Has the same type as "input" . \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator _While.
+ */
+REG_OP(_While)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(cond)
+    .GRAPH(body)
+    .OP_END_FACTORY_REG(_While)
+
+/**
+ *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
+
+ *@par Inputs:
+ *input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li cond: A subgraph takes 'input' and returns a tensor.
+ *          If the tensor is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if it is a numerical scalar, non-zero means True and zero means False;
+ *          if it is a string scalar, non-empty means True and empty means False;
+ *          if it is not a scalar, non-empty means True and empty means False.
+ *@li body: A subgraph takes 'input' and returns a another list of tensors . \n
+
+ *@par Attributes:
+ *parallel_iterations: An optional int, default as 10 . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator While.
+ */
+REG_OP(While)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(cond)
+    .GRAPH(body)
+    .ATTR(parallel_iterations, Int, 10)
+    .OP_END_FACTORY_REG(While)
+
+/**
+ *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n
+
+ *@par Inputs:
+ *input: The input tensors . It's a dynamic input. \n
+
+ *@par Graphs:
+ *@li cond: A subgraph takes 'input' and returns a tensor.
+ *          If the tensor is not a scalar of boolean type,
+ *          it will be converted to a boolean according to the following rule:
+ *          if it is a numerical scalar, non-zero means True and zero means False;
+ *          if it is a string scalar, non-empty means True and empty means False;
+ *          if it is not a scalar, non-empty means True and empty means False.
+ *@li body: A subgraph takes 'input' and returns a another list of tensors . \n
+
+ *@par Attributes:
+ *parallel_iterations: An optional int, default as 10 . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator StatelessWhile.
+ */
+REG_OP(StatelessWhile)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(cond)
+    .GRAPH(body)
+    .ATTR(parallel_iterations, Int, 10)
+    .OP_END_FACTORY_REG(StatelessWhile)
+
+/**
+ *@brief Cyclic execute the "body" subgraph until the first input of For op exceed upper bound . \n
+
+ *@par Inputs:
+ *@li start: A int32 scalar. The lower bound.
+ *@li limit: A int32 scalar. The upper bound.
+ *@li delta: A int32 scalar. The step size.
+ *@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n
+
+ *@par Graphs:
+ *body: A subgraph takes 'input' and returns a another list of tensors . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator For.
+ */
+REG_OP(For)
+    .INPUT(start, DT_INT32)
+    .INPUT(limit, DT_INT32)
+    .INPUT(delta, DT_INT32)
+    .DYNAMIC_INPUT(input, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(body)
+    .OP_END_FACTORY_REG(For)
+
+/**
+ *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n
+
+ *@par Inputs:
+ *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
+
+ *@par Graphs:
+ *f: A subgraph takes 'args' and returns a another list of tensors . \n
+
+ *@par Attributes:
+ *@li config: An optional string, default as "".
+ *@li config_proto: An optional int, default as "".
+ *@li executor_type: An optional int, default as "" . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "f" . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator PartitionedCall.
+ */
+REG_OP(PartitionedCall)
+    .DYNAMIC_INPUT(args, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(f)
+    .ATTR(config, String, "")
+    .ATTR(config_proto, String, "")
+    .ATTR(executor_type, String, "")
+    .OP_END_FACTORY_REG(PartitionedCall)
+
+/**
+ *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n
+
+ *@par Inputs:
+ *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n
+
+ *@par Graphs:
+ *f: A subgraph takes 'args' and returns a another list of tensors . \n
+
+ *@par Attributes:
+ *@li config: An optional string, default as "".
+ *@li config_proto: An optional int, default as "".
+ *@li executor_type: An optional int, default as "" . \n
+
+ *@par Outputs:
+ *output: The output tensors returned by "f" . It's a dynamic output. \n
+
+ *@par Third-party framework compatibility
+ *@Compatible with the TensorFlow operator StatefulPartitionedCall.
+ */
+REG_OP(StatefulPartitionedCall)
+    .DYNAMIC_INPUT(args, TensorType::ALL())
+    .DYNAMIC_OUTPUT(output, TensorType::ALL())
+    .GRAPH(f)
+    .ATTR(config, String, "")
+    .ATTR(config_proto, String, "")
+    .ATTR(executor_type, String, "")
+    .OP_END_FACTORY_REG(StatefulPartitionedCall)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/inc/ops/get_data_ops.h
new file mode 100644
index 00000000..e5518ef8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/get_data_ops.h
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file get_data_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Binding dataset and GetNext
+*@par Attributes: None
+*@par Inputs: Dataset and GetNext operator
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(MakeIterator)
+    .INPUT(x, TensorType::ALL())
+    .INPUT(x1, TensorType::ALL())
+    .ATTR(_kernel, String, "dp")
+    .OP_END_FACTORY_REG(MakeIterator)
+
+/**
+*@brief Dataset iterator
+*@par Attributes:
+*output_types: Data type of output
+*output_shapes: Shapes of output
+*container: Iterator container name
+*shared_name: Iterator id
+*@par Inputs: None
+*@par Outputs: Dataset
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IteratorV2)
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(output_types, ListInt, {})
+    .ATTR(output_shapes,ListListInt, {{}, {}})
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(IteratorV2)
+
+/**
+*@brief Dataset GetNext iterator
+*@par Attributes:
+*output_types: Data type of output
+*output_shapes: Shapes of output
+*output_num: Num of output
+*@par Inputs: Queue data
+*@par Outputs: Input of computer graph
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IteratorGetNext)
+    .INPUT(x, TensorType::ALL())
+    .DYNAMIC_OUTPUT(y, TensorType::ALL())
+    .ATTR(output_types, ListInt, {})
+    .ATTR(output_shapes, ListListInt, {{},{}})
+    .ATTR(output_num, Int, 1)
+    .ATTR(_kernel, String, "dp")
+    .OP_END_FACTORY_REG(IteratorGetNext)
+
+/**
+*@brief Device queue data area.
+*@par Attributes:
+*output_types: Data type of output
+*output_shapes: Shapes of output
+*channel_name: Channel ID corresponding to TDT
+*@par Inputs: None
+*@par Outputs: Dataset GetNext iterator
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DeviceQueueDataset)
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(output_types, ListInt, {})
+    .ATTR(output_shapes, ListListInt, {{},{}})
+    .ATTR(channel_name, String, "")
+    .ATTR(_iterator_name, String, "IteratorV2")
+    .OP_END_FACTORY_REG(DeviceQueueDataset)
+
+} // namespace ge
+
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/inc/ops/hcom_ops.h
new file mode 100644
index 00000000..f4ded0cd
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/hcom_ops.h
@@ -0,0 +1,284 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file hcom_ops.h
+ * \brief huawei collective communication library ops.
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+ * @brief Outputs a tensor gathering all input tensors.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li rank_size: A required integer identifying the number of ranks
+  participating in the op.
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+  "group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomAllGather)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(rank_size, Int)
+    .REQUIRED_ATTR(group, String)
+    .OP_END_FACTORY_REG(HcomAllGather)
+
+/**
+ * @brief Outputs a tensor containing the reduction across all input tensors
+  passed to op.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li reduction: A required string identifying the reduction operation to
+  perform.The supported operation are: "sum", "max", "min", "prod".
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @li fusion: An optional integer identifying the fusion flag of the op.
+  0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
+ * @li fusion_id: An optional integer identifying the fusion id of the op.
+ * The HcomAllReduce ops with the same fusion id will be fused.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+ *"group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomAllReduce)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .REQUIRED_ATTR(reduction, String)
+    .REQUIRED_ATTR(group, String)
+    .ATTR(fusion, Int, 1)
+    .ATTR(fusion_id, Int, -1)
+    .OP_END_FACTORY_REG(HcomAllReduce)
+
+/**
+ * @brief Broadcasts the input tensor in root rank to all ranks.
+ * @par Inputs:
+ * x: A list of dynamic input tensor. Must be one of the following types:
+  int8, int16, int32, float16, float32. It's a dynamic input.
+ * @par Attributes:
+ * @li root_rank: A required integer identifying the root rank in the op
+  input of this rank will be broadcast to other ranks.
+ * @li fusion: A required integer identifying if the op need to fusion,the 
+  default value is none fusion
+  * @li fusion_id: A required integer identifying the fusion id if para fusion
+  is set.
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @par Outputs:
+ * y: A list of dynamic output tensor. Has the same type and length as "x".
+ * It's a dynamic output.
+ * @attention Constraints:
+  "group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomBroadcast)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(root_rank, Int)
+    .REQUIRED_ATTR(group, String)
+    .ATTR(fusion, Int, 0)
+    .ATTR(fusion_id, Int, -1)
+    .OP_END_FACTORY_REG(HcomBroadcast)
+
+/**
+ * @brief preforms reduction from others rank to rootrank
+ * @par Inputs:
+* @li root_rank: A required integer identifying the root rank in the op
+  the reduction result will be on this root rank
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li reduction: A required string identifying the reduction operation to
+  perform.The supported operation are: "sum", "max", "min", "prod".
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @li fusion: An optional integer identifying the fusion flag of the op.
+  0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
+ * @li fusion_id: An optional integer identifying the fusion id of the op.
+ * The HcomReduce ops with the same fusion id will be fused.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+ *"group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomReduce)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .REQUIRED_ATTR(root_rank, Int)
+    .REQUIRED_ATTR(reduction, String)
+    .REQUIRED_ATTR(group, String)
+    .ATTR(fusion, Int, 0)
+    .ATTR(fusion_id, Int, -1)
+    .OP_END_FACTORY_REG(HcomReduce)
+/**
+ * @brief Performs reduction across all input tensors, scattering in equal
+  blocks among ranks, each rank getting a chunk of data based on its rank
+  index.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li reduction: A required string identifying the reduction operation to
+  perform. The supported operation are: "sum", "max", "min", "prod".
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @li rank_size: A required integer identifying the number of ranks
+  participating in the op.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+  "group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomReduceScatter)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .REQUIRED_ATTR(reduction, String)
+    .REQUIRED_ATTR(group, String)
+    .REQUIRED_ATTR(rank_size, Int)
+    .OP_END_FACTORY_REG(HcomReduceScatter)
+
+/**
+ * @brief Sends the input tensor to destination rank.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li sr_tag: A required integer identifying the send/recv message tag. The
+   message will be received by the HcomReceive op with the same "sr_tag".
+ * @li dest_rank: A required integer identifying the destination rank.
+ * @li group: A string identifying the group name of ranks participating in
+  the op.
+ * @par Outputs:
+ * None.
+ * @attention Constraints:
+  @li "group" is limited to 128 characters. Use
+  "hccl_world_group" as the name of a world group.
+ * @li Operators HcomSend and HcomReceive have the same "sr_tag".
+ * @see HcomReceive
+*/
+REG_OP(HcomSend)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .REQUIRED_ATTR(sr_tag, Int)
+    .REQUIRED_ATTR(dest_rank, Int)
+    .OP_END_FACTORY_REG(HcomSend)
+
+/**
+ * @brief Receives the tensor from source rank.
+ * @par Inputs:
+ * None.
+ * @par Attributes:
+ * @li sr_tag: A required integer identifying the send/recv message tag. The
+  message will be send by the HcomSend op with the same "sr_tag".
+ * @li src_rank: A required integer identifying the source rank.
+ * @li group: A required string identifying the group name of ranks
+ * participating in the op.
+ * @li shape: A required list identifying the shape of the tensor to be
+  received.
+ * @li dtype: A required integer identifying the type of the tensor to be
+  received. The supported types are: int8, int16, int32, float16, float32.
+ * @par Outputs:
+ * y: A tensor with type identified in "dtype".
+ * @attention Constraints:
+  @li "group" is limited to 128 characters. Use
+  "hccl_world_group" as the name of a world group.
+ * @li Operators HcomSend and HcomReceive have the same "sr_tag".
+ * @li "shape" should be same as the input tensor of HcomSend.
+ * @li "dtype" should be same as the input tensor of HcomSend.
+ * @see HcomSend
+*/
+REG_OP(HcomReceive)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .REQUIRED_ATTR(sr_tag, Int)
+    .REQUIRED_ATTR(src_rank, Int)
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(HcomReceive)
+
+/**
+ * @brief Performs Remote Read of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
+ * @par Outputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
+REG_OP(HcomRemoteRead)
+    .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
+    .OUTPUT(local, TensorType::ALL())
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(HcomRemoteRead)
+
+/**
+ * @brief Performs Remote Ref Read of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
+ * cache_var: The local base address
+ * local_offset: Skip step length
+ * @par Outputs:
+ * cache_var: The local base address
+ */
+REG_OP(HcomRemoteRefRead)
+    .INPUT(remote, TensorType({DT_UINT64}))
+    .INPUT(cache_var, TensorType({DT_UINT64}))
+    .INPUT(local_offset, TensorType({DT_UINT64}))
+    .OUTPUT(cache_var, TensorType({DT_UINT64})) 
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(HcomRemoteRefRead)
+
+/**
+ * @brief Performs Remote Write of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
+ * @par Inputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
+REG_OP(HcomRemoteWrite)
+    .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
+    .INPUT(local, TensorType::ALL())
+    .OP_END_FACTORY_REG(HcomRemoteWrite)
+
+/**
+ * @brief Performs Remote Write of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
+ * @par Inputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
+REG_OP(HcomRemoteScatterWrite)
+    .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
+    .INPUT(local, TensorType::ALL())
+    .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64}))
+    .OP_END_FACTORY_REG(HcomRemoteScatterWrite)
+
+} // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/inc/ops/hvd_ops.h
new file mode 100644
index 00000000..00299ef7
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/hvd_ops.h
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file hvd_ops.h
+ * \brief Horovod collective communication library ops.
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+ * @brief Outputs a tensor gathering all input tensors.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: uint8, int8, uint16, int16, int32,
+ int64, float16, bool.
+ * @par Attributes:
+ * @li rank_size: A required integer identifying the number of ranks
+ participating in the op.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ */
+REG_OP(HorovodAllgather)
+    // GE not support float64 currently
+    .INPUT(x, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
+    // add rank_size attr
+    .REQUIRED_ATTR(rank_size, Int)
+    .OP_END_FACTORY_REG(HorovodAllgather)
+
+/**
+ * @brief Outputs a tensor containing the reduction across all input tensors
+ passed to op.
+ * @par Inputs:
+ * x: A tensor. Must be one of the following types: int32, int64, float16, float32
+ @par Attributes:
+ * @li reduce_op: A required int identifying the reduction operation to
+ perform.The supported operation are: "sum", "max", "min", "prod".
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ */
+REG_OP(HorovodAllreduce)
+    .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(reduce_op, Int)
+    .OP_END_FACTORY_REG(HorovodAllreduce)
+
+/**
+ * @brief Broadcasts the input tensor in root rank to all ranks.
+ * @par Inputs:
+ * x: A list of dynamic input tensor. Must be one of the following types:
+ int8, int32, float16, float32.
+ * @par Attributes:
+ * @li root_rank: A required integer identifying the root rank in the op
+ input of this rank will be broadcast to other ranks.
+ * @par Outputs:
+ * y: A list of dynamic output tensor. Has the same type and length as "x".
+ */
+REG_OP(HorovodBroadcast)
+    .INPUT(x, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
+    .REQUIRED_ATTR(root_rank, Int)
+    .OP_END_FACTORY_REG(HorovodBroadcast)
+
+} // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/image_ops.h b/third_party/fwkacllib/inc/inc/ops/image_ops.h
new file mode 100644
index 00000000..4703705b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/image_ops.h
@@ -0,0 +1,1539 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file image_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Adjust the hue of one or more images . \n
+
+*@par Inputs:
+*Input images is a tensor of at least 3 dimensions. The last dimension is
+interpretted as channels, and must be three. Inputs include:
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
+*@li delta:A Tensor of type float. A float delta to add to the hue . \n
+
+*@par Outputs:
+*y:A Tensor of type float. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images is a tensor of at least 3 dimensions. The last dimension is
+interpretted as channels, and must be three . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow AdjustHue operator.
+*/
+
+REG_OP(AdjustHue)
+    .INPUT(images, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(delta, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdjustHue)
+
+/**
+*@brief Adjust the saturation of one or more images . \n
+
+*@par Inputs:
+*Input images is a tensor of at least 3 dimensions. The last dimension is
+interpretted as channels, and must be three. Inputs include:
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
+*@li scale:A Tensor of type float. A float scale to add to the saturation . \n
+
+*@par Outputs:
+*y:A Tensor of type float. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images is a tensor of at least 3 dimensions. The last dimension is
+interpretted as channels, and must be three . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow AdjustSaturation operator.
+*/
+
+REG_OP(AdjustSaturation)
+    .INPUT(images, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdjustSaturation)
+
+/**
+*@brief Adjust the contrast of one or more images . \n
+
+*@par Inputs:
+*Input images is a tensor of at least 3 dimensions. The last 3 dimensions are
+interpreted as '[height, width, channels]'. Inputs include:
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
+*@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n
+
+*@par Outputs:
+*y:A Tensor of type float. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images is a tensor of at least 3 dimensions. The last dimension is
+interpretted as channels, and must be three . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow AdjustContrast operator.
+*/
+
+REG_OP(AdjustContrast)
+    .INPUT(images, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(contrast_factor, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AdjustContrast)
+
+/**
+*@brief Extracts crops from the input image tensor and resizes them. Extracts
+crops from the input image tensor and resizes them using bilinear sampling or
+nearest neighbor sampling to a common output size specified by crop_size . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
+int16, int32, int64, float16, float, double. A 4-D tensor of shape
+[batch, image_height, image_width, depth]. The format must be NHWC.
+*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
+*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
+int32 values in [0, batch).
+*@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size
+= [crop_height, crop_width]. All cropped image patches are resized to this size . \n
+
+*@par Attributes:
+*@li extrapolation_value: An optional float. Defaults to 0. Value used for
+extrapolation, when applicable.
+*@li method: An optional string from: '"bilinear", "nearest"'. Defaults to
+"bilinear". Currently two sampling methods are supported: Bilinear and
+NearestNeighbor . \n
+
+*@par Outputs:
+*y:A Tensor of type float. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CropAndResize operator.
+*/
+
+REG_OP(CropAndResize)
+    .INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \
+        DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(box_index, TensorType({DT_INT32}))
+    .INPUT(crop_size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(extrapolation_value, Float, 0)
+    .ATTR(method, String, "bilinear")
+    .OP_END_FACTORY_REG(CropAndResize)
+
+/**
+*@brief Extracts crops from the input image tensor and resizes them.
+* Extracts crops from the input image tensor and resizes them using bilinear sampling or
+* nearest neighbor sampling to a common output size specified by crop_size . \n
+
+*@par Inputs:
+*Input images must be a 5HD tensor. Inputs include:
+*@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape
+* [batch, C1, image_height, image_width, C0].
+*@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
+*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n
+
+*@par Attributes:
+*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size.
+*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable.
+*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n
+
+*@par Outputs:
+*y:A Tensor of type float . \n
+
+*@attention Constraints:
+*Input images must be a 5HD tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CropAndResize operator.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead.
+*/
+REG_OP(CropAndResizeD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(box_index, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(crop_size, ListInt)
+    .ATTR(extrapolation_value, Float, 0)
+    .ATTR(method, String, "bilinear")
+    .OP_END_FACTORY_REG(CropAndResizeD)
+
+/**
+*@brief Computes the gradient of the crop_and_resize op wrt the input
+boxes tensor . \n
+
+*@par Inputs:
+*Input images and grads must be a 4-D tensor. Inputs include:
+*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
+*@li images: A 4-D tensor of shape [batch, image_height, image_width, depth].
+The format must be NHWC.
+Both image_height and image_width need to be positive.
+*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
+specifies the coordinates of a box in the box_ind[i] image and is specified in
+normalized coordinates [y1, x1, y2, x2].
+*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in
+[0, batch). The value of box_ind[i] specifies the image that the i-th box
+refers to . \n
+
+*@par Attributes:
+method: A string specifying the interpolation method. Only 'bilinear' is
+supported for now . \n
+
+*@par Outputs:
+*y:A 2-D tensor of shape [num_boxes, 4] . \n
+
+*@attention Constraints:
+*Input images and grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CropAndResizeGradBoxes operator.
+*/
+
+REG_OP(CropAndResizeGradBoxes)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(images, TensorType({DT_UINT8, DT_UINT16, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(box_index, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(method, String, "bilinear")
+    .OP_END_FACTORY_REG(CropAndResizeGradBoxes)
+
+/**
+*@brief Computes the gradient of the crop_and_resize op wrt the input
+images tensor . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
+*@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
+specifies the coordinates of a box in the box_ind[i] image and is specified
+in normalized coordinates [y1, x1, y2, x2].
+*@li box_index: A 1-D tensor of shape [num_boxes] with int32 values in
+[0, batch). The value of box_ind[i] specifies the image that the i-th box
+refers to.
+*@li image_size: A 1-D tensor with value [batch, image_height, image_width,
+depth] containing the original image size. Both image_height and image_width
+need to be positive . \n
+
+*@par Attributes:
+method: A string specifying the interpolation method. Only 'bilinear' is
+supported for now . \n
+
+*@par Outputs:
+*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
+must be NHWC. \n
+
+*@attention Constraints:
+*Input grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CropAndResizeGradImage operator.
+*/
+
+REG_OP(CropAndResizeGradImage)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(box_index, TensorType({DT_INT32}))
+    .INPUT(image_size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(method, String, "bilinear")
+    .REQUIRED_ATTR(T, Type)
+    .OP_END_FACTORY_REG(CropAndResizeGradImage)
+
+/**
+*@brief Extracts a glimpse from the input tensor . \n
+
+*@par Inputs:
+*Input x must be a 4-D tensor. Inputs include:
+*@li x: A 4-D float tensor of shape [batch_size, height, width, channels].
+The format must be NHWC.
+*@li size: A 1-D tensor of 2 elements containing the size of the glimpses to
+extract. The glimpse height must be specified first, following by the glimpse
+width.
+*@li offsets: A 2-D integer tensor of shape [batch_size, 2] containing the y,
+x locations of the center of each window . \n
+
+*@par Attributes:
+*@li centered: indicates if the offset coordinates are centered relative to
+the image, in which case the (0, 0) offset is relative to the center of the
+input images. If false, the (0,0) offset corresponds to the upper left corner
+of the input images.
+*@li normalized: indicates if the offset coordinates are normalized.
+*@li uniform_noise: indicates if the noise should be generated using a
+uniform distribution or a Gaussian distribution.
+*@li noise: indicates if the noise should uniform, gaussian, or zero.
+The default is uniform which means the the noise type will be decided by
+uniform_noise . \n
+
+*@par Outputs:
+*y:A tensor representing the glimpses [batch_size, glimpse_height,
+glimpse_width, channels]. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input x must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CropAndResizeGradImage operator.
+*/
+
+REG_OP(ExtractGlimpse)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .INPUT(offsets, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(centered, Bool, true)
+    .ATTR(normalized, Bool, true)
+    .ATTR(uniform_noise, Bool, true)
+    .ATTR(noise, String, "uniform")
+    .OP_END_FACTORY_REG(ExtractGlimpse)
+
+/**
+*@brief Convert one or more images from HSV to RGB . \n
+
+*@par Inputs:
+*Last dimension of input x must be size 3. Inputs include:
+*images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3 . \n
+
+*@par Outputs:
+*y:images converted to RGB . \n
+
+*@attention Constraints:
+*Last dimension of input x must be size 3 . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow HSVToRGB operator.
+*/
+
+REG_OP(HSVToRGB)
+    .INPUT(images, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .OP_END_FACTORY_REG(HSVToRGB)
+
+/**
+*@brief Resize quantized images to size using quantized bilinear interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
+size for the images.
+*@li min: A Tensor of type float.
+*@li max: A Tensor of type float . \n
+
+*@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers
+of the 4 corner pixels of the input and output tensors are aligned, preserving
+the values at the corner pixels. Defaults to false.
+*@li half_pixel_centers: indicates if the offset coordinates are normalized . \n
+
+*@par Outputs:
+*@li resized_images: 4-D with shape [batch, new_height, new_width, channels].
+The format must be NHWC.
+*@li y_min: A Tensor of type float.
+*@li y_max: A Tensor of type float . \n
+
+*@attention Constraints:
+*Input images and output images must be quantized types . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QuantizedResizeBilinear operator.
+*/
+
+REG_OP(QuantizedResizeBilinear)
+    .INPUT(images, TensorType({DT_QUINT8,DT_QINT32,DT_FLOAT}))
+    .INPUT(size, TensorType({ DT_INT32 }))
+    .INPUT(min, TensorType({ DT_FLOAT }))
+    .INPUT(max, TensorType({ DT_FLOAT }))
+    .OUTPUT(resized_images, TensorType({DT_QUINT8,DT_QINT32,DT_FLOAT }))
+    .OUTPUT(y_min, TensorType({ DT_FLOAT }))
+    .OUTPUT(y_max, TensorType({ DT_FLOAT }))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(QuantizedResizeBilinear)
+
+/**
+*@brief Resize images to size using area interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
+The new size for the images . \n
+
+*@par Attributes:
+*align_corners: If true, the centers of the 4 corner pixels of the input and
+output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false . \n
+
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format must
+be NHWC. \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeArea operator.
+*/
+
+REG_OP(ResizeArea)
+    .INPUT(images, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(ResizeArea)
+
+/**
+*@brief Computes the gradient of bicubic interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A Tensor of type float. 4-D with shape [batch, height, width,
+channels]. The format must be NHWC.
+*@li original_image: A Tensor. Must be one of the following types: float,
+double. 4-D with shape [batch, orig_height, orig_width, channels], The image
+tensor that was resized. The format must be NHWC. \n
+
+*@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers
+of the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false.
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float .
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeBicubicGrad operator.
+*/
+
+REG_OP(ResizeBicubicGrad)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(original_image, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBicubicGrad)
+
+/**
+*@brief Resize images to size using bicubic interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li images: 4-D with shape [batch, height, width, channels]. The format
+must be NHWC.
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
+size for the images . \n
+
+*@par Attributes:
+*@li align_corners: If true, the centers of the 4 corner pixels of the input
+and output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false.
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format
+must be NHWC. \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float .
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeBicubic operator.
+*/
+
+REG_OP(ResizeBicubic)
+    .INPUT(images, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBicubic)
+
+/**
+*@brief Computes the gradient of nearest neighbor interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A Tensor. Must be one of the following types: uint8, int8, int32,
+float16, float, double. Must set the format, supported format list ["NCHW, NHWC"]
+*@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width.
+The original input size . \n
+
+*@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers
+of the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false.
+*@li half_pixel_centers: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as grads . \n
+
+*@attention Constraints:
+*Input grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeNearestNeighborV2Grad operator.
+*/
+
+REG_OP(ResizeNearestNeighborV2Grad)
+    .INPUT(grads, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                              DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                           DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeNearestNeighborV2Grad)
+
+/**
+*@brief Computes the gradient of nearest neighbor interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*grads: A Tensor. 4-D with shape [batch, height, width, channels].
+
+
+*@par Attributes:
+*@li align_corners: An optional bool. Defaults to False. If true, the centers
+of the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false.
+*@li size: An list type. Specify the images size . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as grads . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeNearestNeighborV2GradD operator.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeNearestNeighborV2Grad instead.
+*/
+
+REG_OP(ResizeNearestNeighborV2GradD)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(size, ListInt)
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeNearestNeighborV2GradD)
+
+/**
+*@brief Computes the gradient of bilinear interpolation . \n
+
+*@par Inputs:
+*Input grads must be a 4-D tensor. Inputs include:
+*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
+*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
+channels], The image tensor that was resized . \n
+
+*@par Attributes:
+*align_corners: An optional bool. Defaults to False. If true, the centers of
+the 4 corner pixels of the input and grad tensors are aligned. Defaults to
+false . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as original_image . \n
+
+*@attention Constraints:
+*Input grads must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeBilinearV2Grad operator.
+*/
+
+REG_OP(ResizeBilinearV2Grad)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(original_image, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBilinearV2Grad)
+
+/**
+*@brief Resize images to size using bilinear interpolation . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
+size for the images . \n
+
+*@par Attributes:
+*align_corners: If true, the centers of the 4 corner pixels of the input and
+output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false . \n
+
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeBilinearV2 operator.
+*/
+
+REG_OP(ResizeBilinearV2)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                               DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeBilinearV2)
+
+/**
+*@brief Converts one or more images from RGB to HSV . \n
+
+*@par Inputs:
+*Last dimension of input images must be size 3. Inputs include:
+*images: A Tensor. Must be one of the following types: float, double. 1-D or
+higher rank. RGB data to convert. Last dimension must be size 3 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as images . \n
+
+*@attention Constraints:
+*Outputs a tensor of the same shape as the images tensor, containing the HSV
+value of the pixels. The output is only well defined if the value in images
+are in [0,1] . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow RGBToHSV operator.
+*/
+
+REG_OP(RGBToHSV)
+    .INPUT(images, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .OP_END_FACTORY_REG(RGBToHSV)
+
+/**
+*@brief Generate a single randomly distorted bounding box for an image . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li image_size: 1-D, containing [height, width, channels].
+*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+boxes associated with the image. \n
+
+*@par Attributes:
+*@li seed: If either seed or seed2 are set to non-zero, the random number
+generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: A second seed to avoid seed collision.
+*@li min_object_covered: The cropped area of the image must contain at least
+this fraction of any bounding box supplied. The value of this parameter should
+be non-negative. In the case of 0, the cropped area does not need to overlap
+any of the bounding boxes supplied .
+*@li aspect_ratio_range: The cropped area of the image must have an aspect
+ratio = width / height within this range.
+*@li max_attempts: Number of attempts at generating a cropped region of the
+image of the specified constraints. After max_attempts failures, return the
+entire image.
+*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+supplied. If true, assume an implicit bounding box covering the whole input.
+If false, raise an error . \n
+
+*@par Outputs:
+*@li begin: 1-D, containing [offset_height, offset_width, 0].
+*@li size: 1-D, containing [target_height, target_width, -1].
+*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SampleDistortedBoundingBox operator.
+*/
+
+REG_OP(SampleDistortedBoundingBox)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(min_object_covered, Float, 0.1f)
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)
+
+/**
+*@brief Generate a single randomly distorted bounding box for an image . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li image_size: 1-D, containing [height, width, channels].
+*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+boxes associated with the image.
+*@li min_object_covered: The cropped area of the image must contain at least
+this fraction of any bounding box supplied. The value of this parameter should
+be non-negative. In the case of 0, the cropped area does not need to overlap
+any of the bounding boxes supplied . \n
+
+*@par Attributes:
+*@li seed: If either seed or seed2 are set to non-zero, the random number
+generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: A second seed to avoid seed collision.
+*@li aspect_ratio_range: The cropped area of the image must have an aspect
+ratio = width / height within this range.
+*@li max_attempts: Number of attempts at generating a cropped region of the
+image of the specified constraints. After max_attempts failures, return the
+entire image.
+*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+supplied. If true, assume an implicit bounding box covering the whole input.
+If false, raise an error . \n
+
+*@par Outputs:
+*@li begin: 1-D, containing [offset_height, offset_width, 0].
+*@li size: 1-D, containing [target_height, target_width, -1].
+*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SampleDistortedBoundingBoxExt2 operator.
+*/
+
+REG_OP(SampleDistortedBoundingBoxExt2)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .INPUT(min_object_covered, TensorType({ DT_FLOAT }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(SampleDistortedBoundingBoxExt2)
+
+/**
+*@brief Resize images to size using nearest neighbor interpolation . \n
+
+*@par Inputs:
+*Input x must be a 4-D tensor. Inputs include:
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"].
+*@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
+The new size for the images . \n
+
+*@par Attributes:
+*align_corners: If true, the centers of the 4 corner pixels of the input and
+output tensors are aligned, preserving the values at the corner pixels.
+Defaults to false . \n
+
+*@par Outputs:
+*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ResizeNearestNeighborV2 operator.
+*/
+
+REG_OP(ResizeNearestNeighborV2)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                               DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                           DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeNearestNeighborV2)
+
+/**
+*@brief Draw bounding boxes on a batch of images . \n
+
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li images: A Tensor. Must be one of the following types: float. 4-D with
+shape [batch, height, width, depth]. A batch of images. The format must be NHWC.
+*@li boxes: A Tensor of type float32. 3-D with shape [batch,
+num_bounding_boxes, 4] containing bounding boxes . \n
+
+*@par Outputs:
+*A Tensor. Has the same type as images. The format must be NHWC. \n
+
+*@attention Constraints:
+*Input images must be a 4-D tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow DrawBoundingBoxes operator.
+*/
+
+REG_OP(DrawBoundingBoxes)
+    .INPUT(images, TensorType({DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(DrawBoundingBoxes)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float type. Inputs include:
+*@li boxes: A 2-D float tensor of shape [num_boxes, 4].
+*@li scores: A 1-D float tensor of shape [num_boxes] representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression . \n
+
+*@par Attributes:
+*iou_threshold: A float representing the threshold for deciding whether boxes
+overlap too much with respect to IOU . \n
+
+*@par Outputs:
+*selected_indices: A 1-D integer tensor of shape [M] representing the selected
+indices from the boxes tensor, where M <= max_output_size . \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float type . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonMaxSuppression operator.
+*/
+
+REG_OP(NonMaxSuppression)
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(iou_threshold, Float, 0.5f)
+    .OP_END_FACTORY_REG(NonMaxSuppression)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float type. Inputs include:
+*@li boxes: A 2-D float tensor of shape [num_boxes, 4].
+*@li scores: A 1-D float tensor of shape [num_boxes] representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU . \n
+
+*@par Outputs:
+*selected_indices: A 1-D integer tensor of shape [M] representing the selected
+indices from the boxes tensor, where M <= max_output_size . \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float type . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonMaxSuppressionV2 operator.
+*/
+
+REG_OP(NonMaxSuppressionV2)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .INPUT(iou_threshold, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(NonMaxSuppressionV2)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float type. Inputs include:
+*@li boxes: A 2-D float tensor of shape [num_boxes, 4].
+*@li scores: A 1-D float tensor of shape [num_boxes] representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Outputs:
+*selected_indices: A 1-D integer tensor of shape [M] representing the selected
+indices from the boxes tensor, where M <= max_output_size . \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float type . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonMaxSuppressionV3 operator.
+*/
+
+REG_OP(NonMaxSuppressionV3)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .INPUT(iou_threshold, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(score_threshold, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(NonMaxSuppressionV3)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float type. Inputs include:
+*@li boxes: A 2-D float tensor of shape [num_boxes, 4].
+*@li scores: A 1-D float tensor of shape [num_boxes] representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Attributes:
+*pad_to_max_output_size: If true, the output selected_indices is padded
+to be of length max_output_size. Defaults to false . \n
+
+*@par Outputs:
+*@li selected_indices: A 1-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size.
+*@li valid_outputs: A 0-D integer tensor representing the number of valid
+elements in selected_indices, with the valid elements appearing first . \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float type . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonMaxSuppressionV4 operator.
+*/
+
+REG_OP(NonMaxSuppressionV4)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .INPUT(iou_threshold, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(score_threshold, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .OUTPUT(valid_outputs, TensorType({DT_INT32}))
+    .ATTR(pad_to_max_output_size, Bool, false)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV4)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input overlaps and  scores must be float type. Inputs include:
+*@li overlaps: A 2-D float tensor of shape [num_boxes, num_boxes]
+representing the n-by-n box overlap values.
+*@li scores: A 1-D float tensor of shape [num_boxes] representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li overlap_threshold: A 0-D float tensor representing the threshold for
+deciding whether boxes overlap too.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Attributes:
+*pad_to_max_output_size: If true, the output selected_indices is padded
+to be of length max_output_size. Defaults to false . \n
+
+*@par Outputs:
+*selected_indices: A 1-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonMaxSuppressionWithOverlaps operator.
+*/
+
+REG_OP(NonMaxSuppressionWithOverlaps)
+    .INPUT(overlaps, TensorType({DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .INPUT(overlap_threshold, TensorType({DT_FLOAT}))
+    .INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(NonMaxSuppressionWithOverlaps)
+
+/**
+*@brief JPEG-encode an image . \n
+
+*@par Inputs:
+*Input image must be unit8 type. Inputs include:
+*image: A 3-D uint8 Tensor of shape [height, width, channels] . \n
+
+*@par Attributes:
+*@li format: Per pixel image format.
+*@li quality: Quality of the compression from 0 to 100 (higher is better
+and slower).
+*@li progressive: If True, create a JPEG that loads progressively (coarse
+to fine).
+*@li optimize_size: If True, spend CPU/RAM to reduce size with no quality
+change.
+*@li chroma_downsampling: A boolean, default is true.
+*@li density_unit: Unit used to specify x_density and y_density: pixels per
+inch ('in') or centimeter ('cm').
+*@li x_density: Horizontal pixels per density unit.
+*@li y_density: Vertical pixels per density unit.
+*@li xmp_metadata: If not empty, embed this XMP metadata in the image header . \n
+
+*@par Outputs:
+*contents: 0-D. JPEG-encoded image . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow EncodeJpeg operator.
+*/
+
+REG_OP(EncodeJpeg)
+    .INPUT(image, TensorType({DT_UINT8}))
+    .OUTPUT(contents, TensorType({DT_STRING}))
+    .ATTR(format, String, "")
+    .ATTR(quality, Int, 95)
+    .ATTR(progressive, Bool, false)
+    .ATTR(optimize_size, Bool, false)
+    .ATTR(chroma_downsampling, Bool, true)
+    .ATTR(density_unit, String, "in")
+    .ATTR(x_density, Int, 300)
+    .ATTR(y_density, Int, 300)
+    .ATTR(xmp_metadata, String, "")
+    .OP_END_FACTORY_REG(EncodeJpeg)
+
+/**
+*@brief PNG-encode an image.
+*@par Inputs:
+*Input image must be unit8 or uint16 type. Inputs include:
+*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
+where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB;
+4: for RGBA . \n
+
+*@par Attributes:
+*compression: Compression level . \n
+
+*@par Outputs:
+*contents: 0-D. PNG-encoded image . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow EncodePng operator.
+*/
+
+REG_OP(EncodePng)
+    .INPUT(image, TensorType({DT_UINT8, DT_UINT16}))
+    .OUTPUT(contents, TensorType({DT_STRING}))
+    .ATTR(compression, Int, -1)
+    .OP_END_FACTORY_REG(EncodePng)
+
+/**
+*@brief Resizes "images" to "size" using bilinear interpolation . \n
+
+*@par Inputs:
+* One input:
+*x: An NC1HWC0 Tensor.
+* Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li size: A required int32 Tensor specifying the new size for the images.
+No default value.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+pixels of the input and output tensors are aligned. Defaults to "false" . \n
+
+*@par Outputs:
+*y: A Tensor with type float32 and the same format as input "images" . \n
+
+*@attention Constraints:
+*@li The input "size" must be a tensor of 2 elements: size[0] <= 2048,
+size[1] <= 2048.
+*@li The input "images" must be a tensor of 5 elements: images[2] <= 2048,
+images[3] <= 2048 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ResizeBilinearV2D.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeBilinearV2 instead.
+*/
+REG_OP(ResizeBilinearV2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .REQUIRED_ATTR(size, ListInt)
+    .OP_END_FACTORY_REG(ResizeBilinearV2D)
+
+/**
+*@brief Resizes "images" to "size" using bilinear interpolation and keep ratio at the time. \n
+
+*@par Inputs:
+* One input:
+*images: An NC1HWC0 Tensor.
+* Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li min_dimension: A required int32 attribute for the min dimension for the images.
+* No default value.
+*@li max_dimension: A required int32 attribute for the max dimension for the images.
+* No default value.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+* pixels of the input and output tensors are aligned. Defaults to "false".
+*@li half_pixel_centers: indicates if the offset coordinates are normalized
+* Defaults to "false" . \n
+
+*@par Outputs:
+*y: A Tensor with type float32 and the same format as input "images" . \n
+
+*@attention Constraints:
+* The input "images" must be a tensor of 5 elements: images[2] <= 2048,
+images[3] <= 2048.
+*/
+REG_OP(KeepRatioResizeBilinear)
+    .INPUT(images, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(min_dimension, Int)
+    .REQUIRED_ATTR(max_dimension, Int)
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(KeepRatioResizeBilinear)
+
+/**
+*@brief Resizes "images" to "size" using nearest neighbor interpolation . \n
+
+*@par Inputs:
+* One input:
+*x: An NC1HWC0 Tensor.
+* Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+*@li size: A required int32 Tensor specifying the new size for the images.
+No default value.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+pixels of the input and output tensors are aligned. Defaults to "false" . \n
+
+*@par Outputs:
+*y: A Tensor with the same type and format as input "images" . \n
+
+*@attention Constraints:
+* The input "size" must be a tensor of 2 elements: size[0] <= 7680,
+size[1] <= 4320
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ResizeNearestNeighborV2.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ResizeNearestNeighborV2 instead.
+*/
+REG_OP(ResizeNearestNeighborV2D)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(size, ListInt)
+    .ATTR(align_corners, Bool, false)
+    .ATTR(half_pixel_centers, Bool, false)
+    .OP_END_FACTORY_REG(ResizeNearestNeighborV2D)
+
+/**
+*@brief Extract the shape information of a JPEG-encoded image . \n
+
+*@par Inputs:
+*Input contents must be 0-D. Inputs include:
+*contents: 0-D. The JPEG-encoded image . \n
+
+*@par Attributes:
+*output_type: The output type of the operation (int32 or int64). Defaults
+to int32 . \n
+
+*@par Outputs:
+*image_shape: 1-D. The image shape with format [height, width, channels] . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow ExtractJpegShape operator.
+*/
+
+REG_OP(ExtractJpegShape)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image_shape, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(output_type, Type)
+    .OP_END_FACTORY_REG(ExtractJpegShape)
+
+/**
+*@brief Draw bounding boxes on a batch of images . \n
+
+*@par Inputs:
+*@li images: 4-D with shape `[batch, height, width, depth]`.
+A batch of images.
+*@li boxes: 3-D with shape `[batch, num_bounding_boxes, 4]`
+containing bounding boxes.
+*@li colors: 2-D. A list of RGBA colors to cycle through for the boxes . \n
+
+*@par Outputs:
+*y: Returns 4-D with the same shape as `images`.
+The batch of input images with bounding boxes drawn on the images . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow DrawBoundingBoxesV2 operator.
+*/
+
+REG_OP(DrawBoundingBoxesV2)
+    .INPUT(images, TensorType({DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(colors, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(DrawBoundingBoxesV2)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of score,
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes . \n
+
+*@par Inputs:
+*@li boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+*@li scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+score corresponding to each box (each row of boxes).
+*@li max_output_size: A scalar integer tensor representing the maximum number of
+boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to
+remove boxes based on score.
+*@li soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS . \n
+
+*@par Attributes:
+pad_to_max_output_size: If true, the output `selected_indices` is padded to be of length
+`max_output_size`. Defaults to false. If not specified, defaults to false . \n
+
+*@par Outputs:
+*@li selected_indices: A 1-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size.
+*@li selected_scores: A 1-D float tensor of shape `[M]` representing the corresponding
+scores for each selected box, where `M <= max_output_size`.
+*@li valid_outputs: A 0-D integer tensor representing the number of valid
+elements in selected_indices, with the valid elements appearing first . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow NonMaxSuppressionV5 operator.
+*/
+
+REG_OP(NonMaxSuppressionV5)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(max_output_size, TensorType({DT_INT32}))
+    .INPUT(iou_threshold, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(score_threshold, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(soft_nms_sigma, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .OUTPUT(selected_scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(valid_outputs, TensorType({DT_INT32}))
+    .ATTR(pad_to_max_output_size, Bool, false)
+    .REQUIRED_ATTR(T, Type)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV5)
+
+/**
+*@brief Resizes "images" to "size" by scale and translate . \n
+
+*@par Inputs:
+*@li images: A `Tensor`. Must be one of the following types: `int8`, `uint8`,
+`int16`, `uint16`, `int32`, `int64`, `bfloat16`, `float32`, `float64`.
+*@li size: A `Tensor` of type `int32`.
+*@li scale: A `Tensor` of type `float32`.
+*@li translation: A `Tensor` of type `float32` . \n
+
+*@li kernel_type: type is string, default  lanczos3
+*@li antialias: type is bool, default true \n
+
+*@par Outputs:
+*y: A Tensor with type float32 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow ScaleAndTranslate operator.
+*/
+
+REG_OP(ScaleAndTranslate)
+    .INPUT(images, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                               DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(translation, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(kernel_type, String, "lanczos3")
+    .ATTR(antialias, Bool, true)
+    .OP_END_FACTORY_REG(ScaleAndTranslate)
+
+/**
+*@brief Computes the gradient by scale and translate . \n
+
+*@par Inputs:
+*@li grads: A `Tensor`. Must be one of the following types: `float32`.
+*@li original_image: A `Tensor`. Must have the same type as `grads`.
+*@li scale: A `Tensor` of type `float32`.
+*@li translation: A `Tensor` of type `float32` . \n
+
+*@li kernel_type: type is string, default  lanczos3
+*@li antialias: type is bool, default true
+
+*@par Outputs:
+*y: A `Tensor`. Has the same type as `grads` . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow ScaleAndTranslateGrad operator.
+*/
+
+REG_OP(ScaleAndTranslateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT}))
+    .INPUT(original_image, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(translation, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(kernel_type, String, "lanczos3")
+    .ATTR(antialias, Bool, true)
+    .OP_END_FACTORY_REG(ScaleAndTranslateGrad)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of score,
+This operation performs non_max_suppression on the inputs per batch, across all classes . \n
+
+*@par Inputs:
+*@li boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then
+same boxes are used for all classes otherwise, if `q` is equal to number of
+classes, class-specific boxes are used.
+*@li scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]`
+representing a single score corresponding to each box (each row of boxes).
+*@li max_output_size_per_class: A scalar integer tensor representing the maximum number of
+boxes to be selected by non max suppression per class.
+*@li max_total_size: A scalar representing maximum number of boxes retained over all classes.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+boxes based on score . \n
+
+*@par Attributes:
+*@li pad_per_class: If false, the output nmsed boxes, scores and classes
+are padded/clipped to `max_total_size`. If true, the
+output nmsed boxes, scores and classes are padded to be of length
+`max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
+which case it is clipped to `max_total_size`. Defaults to false.
+*@li clip_boxes: If true, assume the box coordinates are between [0, 1] and clip the output boxes
+if they fall beyond [0, 1]. If false, do not do clipping and output the box
+coordinates as it is. If not specified, defaults to true . \n
+
+*@par Outputs:
+*nmsed_boxes:type is float
+*nmsed_scores:type is float
+*nmsed_classes:type is float  \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow CombinedNonMaxSuppression operator.
+*/
+
+REG_OP(CombinedNonMaxSuppression)
+    .INPUT(boxes, TensorType({DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT}))
+    .INPUT(max_output_size_per_class, TensorType({DT_INT32}))
+    .INPUT(max_total_size, TensorType({DT_INT32}))
+    .INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT}))
+    .OUTPUT(nmsed_scores, TensorType({DT_FLOAT}))
+    .OUTPUT(nmsed_classes, TensorType({DT_FLOAT}))
+    .OUTPUT(valid_detections, TensorType({DT_INT32}))
+    .ATTR(pad_per_class, Bool, false)
+    .ATTR(clip_boxes, Bool, true)
+    .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
+
+/**
+*@brief Function spatial transformer . \n
+
+*@par Inputs:
+*@li x: A Tensor dtype of float16, float32.
+*@li theta: A Tensor dtype of float16, float32, auxiliary coefficients . \n
+
+*@par Attributes:
+*@li output_size: A tuple output size.
+*@li default_theta: A tuple default theta
+*@li use_default_theta: List use default theta
+*@li align_corners: Align corners
+
+*@par Outputs:
+*y: A Tensor dtype of float16, float32, should be same shape and type as x.
+*/
+REG_OP(SpatialTransformerD)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(output_size, ListInt, {-1, -1})
+    .ATTR(default_theta, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .ATTR(use_default_theta, ListBool, {})
+    .OP_END_FACTORY_REG(SpatialTransformerD)
+
+/**
+* @brief Resize the input tensor. \n
+currently, only support resize image tensor using nearest neighbor and linear interpolation.
+
+* @par Inputs:
+* Input x must be a 4-D tensor. Inputs include: \n
+* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n
+int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n
+or shape [batch, channels, height, width].
+* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n
+is "tf_crop_and_resize"
+* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n
+'scales' and 'sizes' can be specified.
+* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n
+'scales' and 'sizes' can be specified.  If 'size' is specified, then set scales \n
+to empty data (zero shape) in this operator's input list.
+
+* @par Attributes:
+* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n
+the coordinate in the resized tensor to the coordinate in the original tensor. \n
+other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n
+tf_crop_and_resize.
+* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n
+other optional: -0.5
+* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n
+locations outside the tensor will be set to 0 and the weight will be renormalized \n
+so that their sum is 1.0.
+* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n
+is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n
+this value is used as the corresponding output value.
+* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n
+linear and cubic.
+* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n
+round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
+
+* @par Outputs:
+* y: A Tensor. Has the same type as x.
+
+* @attention Constraints: \n
+* Input x must be a 4-D tensor.
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow ResizeNearestNeighborV2 operator.
+*/
+
+REG_OP(Resize)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(scales, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(Resize)
+
+/**
+*@brief Function parse image from string to int. \n
+
+*@par Inputs:
+*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+
+*@par Attributes:
+*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
+*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
+*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
+*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
+*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+*/
+REG_OP(DecodeJpeg)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .ATTR(ratio, Int, 1)
+    .ATTR(fancy_upscaling, Bool, true)
+    .ATTR(try_recover_truncated, Bool, false)
+    .ATTR(acceptable_fraction, Float, 1.0)
+    .ATTR(dct_method, String, "")
+    .OP_END_FACTORY_REG(DecodeJpeg)
+
+/**
+*@brief Image warping using per-pixel flow vectors. \n
+
+*@par Inputs:
+*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*y: Returns 4-D with the same shape and dtype as `images`. \n
+*/
+REG_OP(DenseImageWarp)
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarp)
+
+/**
+*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n
+
+*@par Inputs:
+*@li grad: gradients with respect to DenseImageWarp output.
+*@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*grad_image: Returns 4-D with the same shape and dtype as `images`.
+*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
+*/
+REG_OP(DenseImageWarpGrad)
+    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarpGrad)
+}  // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/inc/ops/internal_ops.h
new file mode 100644
index 00000000..bcc3f1c3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/internal_ops.h
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file internal_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief aicpu assit help op for auxiliary matrix generation. \n
+
+*@par Inputs:
+*The input is dynamic for attribute func_name   \n
+
+*@par Attributes:
+*@li func_name:An required param, for example "topkv2".   \n
+
+*@par Outputs:
+*The output is dynamic for attribute func_name.
+*/
+REG_OP(AssistHelp)
+    .DYNAMIC_INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE }))
+    .DYNAMIC_OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    . REQUIRED_ATTR (func_name, String)
+    . OP_END_FACTORY_REG(AssistHelp)
+
+/**
+*@brief aicpu cache help for lhisi cache flush. \n
+
+*@par Inputs:
+*The input is dynamic for attribute func_name   \n
+
+*@par Outputs:
+*The output is dynamic for attribute func_name.
+*/
+REG_OP(CacheUpdate)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(x, TensorType::BasicType())
+    .OP_END_FACTORY_REG(CacheUpdate)
+
+/**
+*@brief transfer data from L1 buffer to DDR or DDR to L1. \n
+
+*@par Inputs:
+*The input is dynamic for attribute func_name   \n
+
+*@par Outputs:
+*The output is dynamic for attribute func_name.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InternalDataMove)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .REQUIRED_ATTR(src_buf, String)
+    .REQUIRED_ATTR(dst_buf, String)
+    .OP_END_FACTORY_REG(InternalDataMove)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/inc/ops/linalg_ops.h
new file mode 100644
index 00000000..330fef2e
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/linalg_ops.h
@@ -0,0 +1,443 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file linalg_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Computes the reverse mode backpropagated gradient of the Cholesky
+algorithm . \n
+
+*@par Inputs:
+*The input x has to be symmetric and positive definite. Inputs include:
+*@li x:A Tensor. Must be one of the following types: double, float32. Output
+of batch Cholesky algorithm x = cholesky(A). Shape is [..., M, M]. Algorithm
+depends only on lower triangular part of the innermost matrices of this tensor.
+*@li grad:A Tensor. Must have the same type as l. df/dx where f is some
+scalar function. Shape is [..., M, M]. Algorithm depends only on lower
+triangular part of the innermost matrices of this tensor . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
+form square matrices.
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CholeskyGrad operator.
+*/
+
+REG_OP(CholeskyGrad)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(CholeskyGrad)
+
+/**
+*@brief Computes the Cholesky decomposition of one or more square matrices . \n
+
+*@par Inputs:
+*The input x has to be symmetric and positive definite.Inputs include:
+*x:A Tensor. Must be one of the following types: double, float32, float16,
+complex64, complex128. Shape is [..., M, M] . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
+form square matrices.
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Cholesky operator.
+*/
+
+REG_OP(Cholesky)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Cholesky)
+
+/**
+*@brief Computes the sign and the log of the absolute value of the determinant
+of one or more square matrices . \n
+
+*@par Inputs:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. Inputs include:
+*x:A Tensor. Must be one of the following types: double, float32,
+complex64, complex128. Shape is [..., M, M] . \n
+
+*@par Outputs:
+*@li y:A Tensor. Has the same type as x.
+*@li sign:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow LogMatrixDeterminant operator.
+*/
+
+REG_OP(LogMatrixDeterminant)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(LogMatrixDeterminant)
+
+/**
+*@brief Computes the determinant of one or more square matrices . \n
+
+*@par Inputs:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. Inputs include:
+*x:A Tensor. Must be one of the following types: double, float32, complex64,
+complex128. Shape is [..., M, M] . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
+form square matrices.
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow MatrixDeterminant operator.
+*/
+
+REG_OP(MatrixDeterminant)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(MatrixDeterminant)
+
+/**
+*@brief Computes the inverse of one or more square invertible matrices or
+their adjoints (conjugate transposes) . \n
+
+*@par Inputs:
+*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
+form square matrices. Inputs include:
+*x:A Tensor of input. Shape is [..., M, M] . \n
+
+*@par Attributes:
+*adjoint:An optional bool. Defaults to False.Boolean indicating whether to
+deal with matrix or its (block-wise) adjoint . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
+form square matrices.  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow MatrixInverse operator.
+*/
+
+REG_OP(MatrixInverse)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(adjoint, Bool, false)
+    .OP_END_FACTORY_REG(MatrixInverse)
+
+/**
+*@brief Solves systems of linear equations . \n
+
+*@par Inputs:
+*The input rhs must have the same type as matrix. Inputs include:
+*@li matrix:A Tensor of input. Shape is [..., M, M].
+*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
+
+*@par Attributes:
+*adjoint:An optional bool. Defaults to False.Boolean indicating whether to
+solve with matrix or its (block-wise) adjoint . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as matrix . \n
+
+*@attention Constraints:
+*The input matrix is a tensor of shape [..., M, M] whose inner-most 2
+dimensions form square matrices.  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow MatrixSolve operator.
+*/
+
+REG_OP(MatrixSolve)
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(adjoint, Bool, false)
+    .OP_END_FACTORY_REG(MatrixSolve)
+
+/**
+*@brief Solves systems of linear equations . \n
+
+*@par Inputs:
+*The input rhs must have the same type as matrix. Inputs include:
+*@li matrix:A Tensor. Shape is [..., M, M].
+*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K].
+*@li l2:0-D double Tensor. Ignored if fast=False . \n
+
+*@par Attributes:
+*fast:bool. Defaults to True . \n
+
+*@par Outputs:
+*y:Tensor of shape [..., N, K] whose inner-most 2 dimensions form M-by-K
+matrices that solve the equations matrix[..., :, :] * output[..., :, :] =
+rhs[..., :, :] in the least squares sense . \n
+
+*@attention Constraints:
+*The input matrix matrix is a tensor of shape [..., M, M] whose inner-most 2
+dimensions form square matrices.  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow MatrixSolveLs operator.
+*/
+
+REG_OP(MatrixSolveLs)
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(l2, TensorType({DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(fast, Bool, true)
+    .OP_END_FACTORY_REG(MatrixSolveLs)
+
+/**
+*@brief Solves systems of linear equations with upper or lower triangular
+matrices by backsubstitution . \n
+
+*@par Inputs:
+*The input rhs must have the same type as matrix. Inputs include:
+*@li matrix: A Tensor. Shape is [..., M, M].
+*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
+
+*@par Attributes:
+*@li lower: An optional bool. Defaults to True. Boolean indicating whether
+the innermost matrices in matrix are lower or upper triangular.
+*@li An optional bool. Defaults to False. Boolean indicating whether to solve
+with matrix or its (block-wise) adjoint . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as matrix . \n
+
+*@attention Constraints:
+*The input matrix is a tensor of shape [..., M, M] whose inner-most 2
+dimensions form square matrices.  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow MatrixTriangularSolve operator.
+*/
+
+REG_OP(MatrixTriangularSolve)
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(lower, Bool, true)
+    .ATTR(adjoint, Bool, false)
+    .OP_END_FACTORY_REG(MatrixTriangularSolve)
+
+/**
+*@brief Computes the QR decompositions of one or more matrices . \n
+
+*@par Inputs:
+*The input shape of x must be [..., M, N]. Inputs include:
+*x:A Tensor whose shape is [..., M, N]. \n
+
+*@par Attributes:
+*full_matrices: An optional bool. Defaults to False. If true, compute
+full-sized q and r. If false (the default), compute only the leading P
+columns of q . \n
+
+*@par Outputs:
+*@li q: A Tensor. Has the same type as x.
+*@li r: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input matrix x is a tensor of shape [..., M, N] whose inner-most 2
+dimensions form matrices of size [M, N].  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Qr operator.
+*/
+
+REG_OP(Qr)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .ATTR(full_matrices, Bool, false)
+    .OP_END_FACTORY_REG(Qr)
+
+/**
+*@brief Computes the eigen decomposition of a batch of self-adjoint matrices . \n
+
+*@par Inputs:
+*The input shape of x must be [..., N, N]. Inputs include:
+*x:Tensor of shape [..., N, N]. Only the lower triangular part of each inner
+inner matrix is referenced . \n
+
+*@par Attributes:
+*compute_v:bool. Defaults to True . \n
+
+*@par Outputs:
+*@li eigen_value:Eigenvalues. Shape is [..., N]. Sorted in non-decreasing order.
+*@li eigen_vector:Shape is [..., N, N]. The columns of the inner most matrices
+contain eigenvectors of the corresponding matrices in tensor
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions
+form square matrices.   \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SelfAdjointEig operator.
+*/
+
+REG_OP(SelfAdjointEig)
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .ATTR(compute_v, Bool, true)
+    .OP_END_FACTORY_REG(SelfAdjointEig)
+
+/**
+*@brief Computes the singular value decompositions of one or more matrices . \n
+
+*@par Inputs:
+*The input shape of x must be [..., N, N]. Inputs include:
+*x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n
+
+*@par Attributes:
+*compute_uv:If True then left and right singular vectors will be computed and
+returned in u and v, respectively. Otherwise, only the singular values will
+be computed, which can be significantly faster . \n
+
+*@par Outputs:
+*@li sigma:Singular values. Shape is [..., P]. The values are sorted in
+reverse order of magnitude, so s[..., 0] is the largest value, s[..., 1]
+is the second largest, etc.
+*@li u:Left singular vectors. If full_matrices is False (default) then shape
+is [..., M, P]; if full_matrices is True then shape is [..., M, M]. Not
+returned if compute_uv is False.
+*@li v:Right singular vectors. If full_matrices is False (default) then shape
+is [..., N, P]. If full_matrices is True then shape is [..., N, N]. Not
+returned if compute_uv is False . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions
+form square matrices.  \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Svd operator
+*/
+
+REG_OP(Svd)
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .ATTR(compute_uv, Bool, true)
+    .ATTR(full_matrices, Bool, false)
+    .OP_END_FACTORY_REG(Svd)
+
+/**
+*@brief Computes the LU decomposition of one or more square matrices . \n
+
+*@par Inputs:
+*input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
+matrices of size `[M, M]` . \n
+
+*@par Outputs:
+*@li lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part
+denotes the lower triangular factor `L` with unit diagonal.
+*@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
+of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow Lu operator.
+*/
+
+REG_OP(Lu)
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(output_idx_type, Type)
+    .OP_END_FACTORY_REG(Lu)
+
+/**
+*@brief Computes the matrix square root of one or more square matrices . \n
+
+*@par Inputs:
+*input: Shape is `[..., M, M]` . \n
+
+*@par Outputs:
+y: Shape is `[..., M, M]` . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow MatrixSquareRoot operator.
+*/
+
+REG_OP(MatrixSquareRoot)
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(MatrixSquareRoot)
+
+/**
+*@brief Solves tridiagonal systems of equations . \n
+
+*@par Inputs:
+*@li diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the tridiagonal matrices with three rows being the superdiagonal, diagonals, and subdiagonals, in order. The last element of the superdiagonal and the first element of the subdiagonal is ignored.
+*@li rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each
+left-hand side . \n
+
+*@par Outputs:
+y: Tensor of shape `[..., M, K]` containing the solutions \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow TridiagonalSolve operator.
+*/
+
+REG_OP(TridiagonalSolve)
+    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(partial_pivoting, Bool, true)
+    .OP_END_FACTORY_REG(TridiagonalSolve)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/list_ops.h b/third_party/fwkacllib/inc/inc/ops/list_ops.h
new file mode 100644
index 00000000..33270ea8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/list_ops.h
@@ -0,0 +1,230 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file list_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+
+#include <algorithm>
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Creates and returns an empty tensor list. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li max_num_elements: The maximum number of elements. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li handle: An empty tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow EmptyTensorList operator.
+*/
+REG_OP(EmptyTensorList)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(max_num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(EmptyTensorList)
+
+/**
+*@brief Returns a list which has the passed-in `Tensor` as last element
+and the other elements of the given list in `input_handle`. \n
+
+*@par Inputs:
+*@li input_handle: The old list.
+*@li tensor: The tensor to put on the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle:A list with the elements of old list followed by tensor. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPushBack operator.
+*/
+REG_OP(TensorListPushBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPushBack)
+
+/**
+*@brief The last element of the input list as well as a
+list with all but that element. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle:A list with the elements of the old list followed by tensor.
+*@li tensor:The withdrawn last element of the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPopBack operator.
+*/
+REG_OP(TensorListPopBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPopBack)
+
+/**
+*@brief The number of tensors in the input tensor list. \n
+
+*@par Inputs:
+*@li input_handle: The input list. \n
+
+*@par Outputs:
+*@li length:The number of tensors in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListLength operator.
+*/
+REG_OP(TensorListLength)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(length, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(TensorListLength)
+
+/**
+*@brief The shape of elements in the input tensor list. \n
+
+*@par Inputs:
+*@li input_handle: The input list. \n
+
+*@par Attributes:
+*@li shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*@li element_shape:A shape compatible with that of elements in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListElementShape operator.
+*/
+REG_OP(TensorListElementShape)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListElementShape)
+
+/**
+*@brief List of the given size with empty elements. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li num_elements: The number of elements to reserve. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list.
+*@li shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*@li handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListReserve operator.
+*/
+REG_OP(TensorListReserve)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListReserve)
+
+/**
+*@brief Get input tensor list elements of index position. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: A tensor of position.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li item: An output tensor value of index position . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListGetItem operator.
+*/
+REG_OP(TensorListGetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListGetItem)
+
+/**
+*@brief Sets the index-th position of the list to contain the given tensor. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: The position in the list to which the tensor will be assigned.
+*@li item: The element to be assigned to that position. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListSetItem operator.
+*/
+REG_OP(TensorListSetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListSetItem)
+
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/inc/ops/logging_ops.h
new file mode 100644
index 00000000..03be7757
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/logging_ops.h
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file logging_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Provides the time since epoch in seconds . \n
+
+*@par Outputs:
+*y: A Tensor of type float64. The timestamp as a double for seconds since
+the Unix epoch . \n
+
+*@attention Constraints:
+*The timestamp is computed when the op is executed, not when it is added to
+the graph . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Timestamp operator . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Timestamp)
+  .OUTPUT(y, TensorType({DT_DOUBLE}))
+  .OP_END_FACTORY_REG(Timestamp)
+
+/**
+*@brief Asserts that the given condition is true . \n
+
+*@par Inputs:
+*If input_condition evaluates to false, print the list of tensors in data.
+*Inputs include:
+*@li input_condition: The condition to evaluate.
+*@li input_data: The tensors to print out when condition is false .
+ It's a dynamic input.  \n
+
+*@par Attributes:
+*summarize: Print this many entries of each tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Assert operator . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Assert)
+  .INPUT(input_condition, TensorType{DT_BOOL})
+  .DYNAMIC_INPUT(input_data, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8,
+      DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+      DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING}))
+  .ATTR(summarize, Int, 3)
+  .OP_END_FACTORY_REG(Assert)
+
+/**
+*@brief Prints a tensor . \n
+
+*@par Inputs:
+*x: The tensor to print, it is a dynamic_input . \n
+
+*Compatible with aicpu Print operator . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Print)
+.DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
+    DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING}))
+.OP_END_FACTORY_REG(Print)
+
+/**
+*@brief Prints a string scalar . \n
+
+*@par Inputs:
+*The dtype of input x must be string. Inputs include:
+*x: The string scalar to print . \n
+
+*@par Attributes:
+*output_stream: A string specifying the output stream or logging level
+to print to . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow PrintV2 operator . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(PrintV2)
+  .INPUT(x, TensorType({DT_STRING}))
+  .ATTR(output_stream, String, "stderr")
+  .OP_END_FACTORY_REG(PrintV2)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/inc/ops/lookup_ops.h
new file mode 100644
index 00000000..5d928e5a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/lookup_ops.h
@@ -0,0 +1,308 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file lookup_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Replaces the contents of the table with the specified keys and values . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*@li handle: A Tensor of type resource. Handle to the table.
+*@li keys: A Tensor. Any shape. Keys to look up.
+*@li values: A Tensor. Values to associate with keys . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow LookupTableImport operator.
+*/
+
+REG_OP(LookupTableImport)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(keys, TensorType({DT_STRING, DT_INT32, DT_INT64}))
+    .INPUT(values, TensorType({DT_BOOL, DT_DOUBLE, \
+        DT_FLOAT, DT_INT32, DT_INT64, DT_STRING}))
+    .OP_END_FACTORY_REG(LookupTableImport)
+
+/**
+*@brief Updates the table to associates keys with values . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*@li handle: A Tensor of type resource. Handle to the table.
+*@li keys: A Tensor. Any shape. Keys to look up.
+*@li values: A Tensor. Values to associate with keys . \n
+
+*@attention Constraints:
+*@li The tensor keys must be of the same type as the keys of the table.
+*@li The tensor values must be of the type of the table values.
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow LookupTableInsert operator.
+*/
+
+REG_OP(LookupTableInsert)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(keys, TensorType({DT_STRING, DT_INT32, DT_INT64}))
+    .INPUT(values, TensorType({DT_BOOL, DT_DOUBLE, DT_FLOAT, \
+        DT_INT32, DT_INT64, DT_STRING}))
+    .OP_END_FACTORY_REG(LookupTableInsert)
+
+/**
+*@brief Outputs all keys and values in the table . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@par Attributes:
+*@li Tkeys: A DType.
+*@li Tvalues: A DType . \n
+
+*@par Outputs:
+*@li keys: A Tensor of type Tkeys.
+*@li values: A Tensor of type Tvalues . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow LookupTableExport operator.
+*/
+
+REG_OP(LookupTableExport)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(values, TensorType({DT_BOOL, DT_DOUBLE, DT_FLOAT, \
+        DT_INT32, DT_INT64, DT_STRING}))
+    .REQUIRED_ATTR(Tkeys, Type)
+    .REQUIRED_ATTR(Tvalues, Type)
+    .OP_END_FACTORY_REG(LookupTableExport)
+
+/**
+*@brief Computes the number of elements in the given table . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@par Outputs:
+*size: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow LookupTableSize operator.
+*/
+
+REG_OP(LookupTableSize)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .OUTPUT(size, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(LookupTableSize)
+
+/**
+*@brief Looks up keys in a table, outputs the corresponding values . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*@li handle: A Tensor of type resource. Handle to the table.
+*@li keys: A Tensor. Any shape. Keys to look up.
+*@li default_value: A Tensor . \n
+
+*@par Attributes:
+*Tout: Specified type of ouput values . \n
+
+*@par Outputs:
+*values: A Tensor. Has the same type as default_value . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow LookupTableFind operator.
+*/
+
+REG_OP(LookupTableFind)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(default_value, TensorType({DT_DOUBLE, DT_FLOAT, \
+        DT_INT32, DT_INT64, DT_STRING, DT_BOOL}))
+    .OUTPUT(values, TensorType({DT_DOUBLE, DT_FLOAT, DT_INT32, \
+        DT_INT64, DT_STRING, DT_BOOL}))
+    .REQUIRED_ATTR(Tout, Type)
+    .OP_END_FACTORY_REG(LookupTableFind)
+
+/**
+*@brief Creates a non-initialized hash table . \n
+
+*@par Attributes:
+*@li container: An optional string. Defaults to "". If non-empty, this table
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this
+table is shared under the given name across multiple sessions.
+*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
+shared_name is empty, the table is shared using the node name.
+*@li key_dtype: A DType. Type of the table keys.
+*@li value_dtype: A DType. Type of the table values . \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@attention Constraints:
+*The implementation for HashTable on Ascend uses ai cpu, with bad performance.
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow HashTable operator.
+*/
+
+REG_OP(HashTable)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(use_node_name_sharing, Bool, false)
+    .REQUIRED_ATTR(key_dtype, Type)
+    .REQUIRED_ATTR(value_dtype, Type)
+    .OP_END_FACTORY_REG(HashTable)
+
+/**
+*@brief Table initializer that takes two tensors for keys and values
+respectively . \n
+
+*@par Inputs:
+*The dtype of input handle must be resource. Inputs include:
+*@li handle: A Tensor of type resource. Handle to a table which will be
+initialized.
+*@li keys: A Tensor. Keys of type Tkey.
+*@li values: A Tensor. Values of type Tval . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow InitializeTable operator.
+*/
+
+REG_OP(InitializeTable)
+    .INPUT(handle, TensorType({DT_RESOURCE}))
+    .INPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(values, TensorType({DT_INT32, DT_INT64, DT_FLOAT, \
+        DT_DOUBLE, DT_BOOL, DT_STRING}))
+    .OP_END_FACTORY_REG(InitializeTable)
+
+/**
+*@brief Creates an empty hash table that uses tensors as the backing store . \n
+
+*@par Inputs:
+*The input deleted_key must have the same type as empty_key. Inputs include:
+*@li empty_key: A Tensor. The key used to represent empty key buckets
+internally. Must not be used in insert or lookup operations.
+*@li deleted_key: A Tensor. Must have the same type as empty_key . \n
+
+*@par Attributes:
+*@li container: An optional string. Defaults to "". If non-empty, this table
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this
+table is shared under the given name across multiple sessions.
+*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
+shared_name is empty, the table is shared using the node name.
+*@li value_dtype: A DType. Type of the table values.
+*@li value_shape: An optional TensorShape or list of ints. Defaults to [].
+The shape of each value.
+*@li initial_num_buckets: An optional int. Defaults to 131072. The initial
+number of hash table buckets. Must be a power to 2.
+*@li max_load_factor: An optional float. Defaults to 0.8. The maximum ratio
+between number of entries and number of buckets before growing the table.
+Must be between 0 and 1 . \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow MutableDenseHashTable operator.
+*/
+
+REG_OP(MutableDenseHashTable)
+    .INPUT(empty_key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(deleted_key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(use_node_name_sharing, Bool, false)
+    .REQUIRED_ATTR(value_dtype, Type)
+    .ATTR(value_shape, ListInt, {})
+    .ATTR(initial_num_buckets, Int, 131072)
+    .ATTR(max_load_factor, Float, 0.8)
+    .OP_END_FACTORY_REG(MutableDenseHashTable)
+
+/**
+*@brief Creates an empty hash table . \n
+
+*@par Attributes:
+*@li container: An optional string. Defaults to "". If non-empty, this table
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this
+table is shared under the given name across multiple sessions.
+*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
+shared_name is empty, the table is shared using the node name.
+*@li key_dtype: A DType. Type of the table keys.
+*@li value_dtype: A DType. Type of the table values.
+*@li value_shape: An optional TensorShape or list of ints. Defaults to [] . \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow MutableHashTableOfTensors operator.
+*/
+
+REG_OP(MutableHashTableOfTensors)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(use_node_name_sharing, Bool, false)
+    .REQUIRED_ATTR(key_dtype, Type)
+    .REQUIRED_ATTR(value_dtype, Type)
+    .ATTR(value_shape, ListInt, {})
+    .OP_END_FACTORY_REG(MutableHashTableOfTensors)
+
+/**
+*@brief Creates an empty hash table . \n
+
+*@par Attributes:
+*@li container: An optional string. Defaults to "". If non-empty, this table
+is placed in the given container. Otherwise, a default container is used.
+*@li shared_name: An optional string. Defaults to "". If non-empty, this
+table is shared under the given name across multiple sessions.
+*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
+shared_name is empty, the table is shared using the node name.
+*@li key_dtype: A DType. Type of the table keys.
+*@li value_dtype: A DType. Type of the table values . \n
+
+*@par Outputs:
+*handle: A Tensor of type resource. Handle to the table . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow MutableHashTable operator.
+*/
+
+REG_OP(MutableHashTable)
+    .OUTPUT(handle, TensorType({DT_RESOURCE}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .ATTR(use_node_name_sharing, Bool, false)
+    .REQUIRED_ATTR(key_dtype, Type)
+    .REQUIRED_ATTR(value_dtype, Type)
+    .OP_END_FACTORY_REG(MutableHashTable)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/math_ops.h b/third_party/fwkacllib/inc/inc/ops/math_ops.h
new file mode 100644
index 00000000..50d058ba
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/math_ops.h
@@ -0,0 +1,957 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file math_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Computes the output as (shift + scale * x) ^ power . \n
+
+*@par Inputs:
+* x: A Tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li power: Optional. Must be one of the following types: float32. Defaults to 1.0.
+*@li scale: Optional. Must be one of the following types: float32. Defaults to 1.0.
+*@li shift: Optional. Must be one of the following types: float32. Defaults to 0.0 . \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type and shape as "x".
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Power.
+*/
+
+REG_OP(Power)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(power, Float, 1.0)
+    .ATTR(scale, Float, 1.0)
+    .ATTR(shift, Float, 0.0)
+    .OP_END_FACTORY_REG(Power);
+
+/**
+*@brief Compute the lower regularized incomplete Gamma function P(a, x) . \n
+
+*@par Inputs:
+*The input a and x must have the same type. Inputs include:
+*@li a:A Tensor. Must be one of the following types: float, double.
+*@li x:A Tensor. Must have the same type as a . \n
+
+*@par Outputs:
+*z:A Tensor. Has the same type as a . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow Igamma operator.
+*/
+
+REG_OP(Igamma)
+    .INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Igamma)
+
+/**
+*@brief Compute the upper regularized incomplete Gamma function Q(a, x) . \n
+
+*@par Inputs:
+*The input a and x must have the same type. Inputs include:
+*@li a:A Tensor. Must be one of the following types: float, float64.
+*@li x:A Tensor. Must have the same type as a . \n
+
+*@par Outputs:
+*z:A Tensor. Has the same type as a . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow Igammac operator.
+*/
+
+REG_OP(Igammac)
+    .INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Igammac)
+
+/**
+*@brief Compare values of input to threshold and pack resulting bits into
+a uint8 . \n
+
+*@par Inputs:
+*The input size must be a non-negative int32 scalar Tensor. Inputs include:
+*@li input:Values to compare against threshold and bitpack.
+*@li threshold:Threshold to compare against . \n
+
+*@par Outputs:
+*y:The bitpacked comparisons . \n
+
+*@attention Constraints:
+*Currently, the innermost dimension of the tensor must be divisible by 8. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow CompareAndBitpack operator
+*/
+
+REG_OP(CompareAndBitpack)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, \
+        DT_INT16, DT_INT32, DT_INT64, DT_BOOL }))
+    .INPUT(threshold, TensorType({ DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
+        DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL }))
+    .OUTPUT(y, TensorType(DT_UINT8))
+    .OP_END_FACTORY_REG(CompareAndBitpack)
+
+/**
+*@brief Counts the number of occurrences of each value in an integer array.
+Outputs a vector with length size and the same dtype as weights. If weights
+are empty, then index i stores the number of times the value i is counted in
+arr. If weights are non-empty, then index i stores the sum of the value in
+weights at each index . \n
+
+*@par Inputs:
+*The input size must be a non-negative int32 scalar Tensor. Inputs include:
+*@li array:int32 Tensor.
+*@li size:non-negative int32 scalar Tensor.
+*@li weights: is an int32, int64, float32, or double Tensor with the same
+shape as arr, or a length-0 Tensor, in which case it acts as all weights
+equal to 1 . \n
+
+*@par Outputs:
+*bins:1D Tensor with length equal to size. The counts or summed weights for
+each value in the range [0, size) . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow Bincount operator
+*/
+
+REG_OP(Bincount)
+    .INPUT(array, TensorType(DT_INT32))
+    .INPUT(size, TensorType(DT_INT32))
+    .INPUT(weights, TensorType({ DT_FLOAT, DT_INT32, DT_INT64, DT_DOUBLE }))
+    .OUTPUT(bins, TensorType({ DT_FLOAT, DT_INT32, DT_INT64, DT_DOUBLE }))
+    .OP_END_FACTORY_REG(Bincount)
+
+/**
+*@brief Compute the regularized incomplete beta integral . \n
+
+*@par Inputs:
+*The input b and x must have the same types as a. Inputs include:
+*@li a:A Tensor. Must be one of the following types: float32, double.
+*@li b:A Tensor. Must have the same type as a.
+*@li x:A Tensor. Must have the same type as a . \n
+
+*@par Outputs:
+*z:A Tensor. Has the same type as a . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow Betainc operator.
+*/
+
+REG_OP(Betainc)
+    .INPUT(a, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Betainc)
+
+/**
+*@brief Compute the Hurwitz zeta function
+
+*@par Inputs:
+*The input q must be the same type as x. Inputs include:
+*@li x:A Tensor. Must be one of the following types: float32, double.
+*@li q:A Tensor. Must have the same type as x . \n
+
+*@par Outputs:
+*z:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for Zeta on Ascend uses ai cpu, with bad performance.
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow Zeta operator.
+*/
+
+REG_OP(Zeta)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .INPUT(q, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_DOUBLE, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Zeta)
+
+/**
+*@brief Bucketize 'input' based on 'boundaries'. For example, if the inputs
+are boundaries = [0, 10, 100] input = [[-5, 10000] [150, 10] [5, 100]] then
+the output will be output = [[0, 3] [3, 2] [1, 3]]
+
+*@par Inputs:
+*The dtype of input x  int float double. Inputs include:
+*x:Any shape of Tensor contains with int or float type . \n
+
+*@par Attributes:
+*boundaries:A sorted list of floats gives the boundary of the buckets . \n
+
+*@par Outputs:
+*y:Same shape with 'input', each value of input replaced with bucket index . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow Bucketize operator.
+*/
+
+REG_OP(Bucketize)
+    .INPUT(x, TensorType({DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(boundaries, ListFloat)
+    .OP_END_FACTORY_REG(Bucketize)
+
+/**
+*@brief Returns a new tensor with the truncated integer values of the elements of input. \n
+
+*@par Inputs:
+*One inputs, including:
+*   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
+
+*@par Outputs:
+*y: A tensor with the same type and shape of input_x \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Trunc. \n
+*/
+REG_OP(Trunc)
+    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OP_END_FACTORY_REG(Trunc)
+	
+/**
+*@brief Computes the sum along sparse segments of a tensor . \n
+
+*@par Inputs:
+*The input indices and segment_ids must have same rank. Inputs include:
+*@li x:A Tensor. Must be one of the following types: float, double, int32,
+uint8, int16, int8, int64, uint16, uint32, uint64.
+*@li indices: A Tensor. Must be one of the following types: int32, int64.
+A 1-D tensor. Has same rank as segment_ids.
+*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be
+sorted and can be repeated . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseSegmentSum operator
+*/
+
+REG_OP(SparseSegmentSum)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(SparseSegmentSum)
+
+/**
+*@brief Computes the mean along sparse segments of a tensor . \n
+
+*@par Inputs:
+*The input indices and segment_ids must have same rank. Inputs include:
+*@li x: A Tensor. Must be one of the following types: float, double.
+*@li indices: A Tensor. Must be one of the following types: int32, int64.
+A 1-D tensor. Has same rank as segment_ids.
+*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be
+sorted and can be repeated . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseSegmentMean operator
+*/
+
+REG_OP(SparseSegmentMean)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(SparseSegmentMean)
+
+/**
+*@brief Computes gradients for SparseSegmentMean . \n
+
+*@par Inputs:
+*The input grad must have be type float or double. Inputs include:
+*@li grad: A Tensor. Must be one of the following types: float, double.
+gradient propagated to the SparseSegmentMean op.
+*@li indices: A Tensor. Must be one of the following types: int32, int64.
+indices passed to the corresponding SparseSegmentMean op.
+*@li segment_ids: A Tensor of type int32. segment_ids passed to the
+corresponding SparseSegmentMean op.
+*@li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to
+SparseSegmentMean op . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as grad . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SparseSegmentMeanGrad operator
+*/
+
+REG_OP(SparseSegmentMeanGrad)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .INPUT(output_dim0, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(SparseSegmentMeanGrad)
+
+/**
+*@brief Computes the gradient of igamma(a, x) wrt a
+
+*@par Inputs:
+*The input a and x must have the same type. Inputs include:
+*@li a:A Tensor. Must be one of the following types: float32, double.
+*@li x:A Tensor. Must have the same type as a . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as a . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow IgammaGradA operator
+*/
+
+REG_OP(IgammaGradA)
+    .INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(IgammaGradA)
+
+/**
+*@brief Initialize data process channel . \n
+
+*@par Attributes:
+*channel_name: A string. Default "" . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow InitData operator
+*/
+
+REG_OP(InitData)
+    .ATTR(channel_name, String, "")
+    .OP_END_FACTORY_REG(InitData)
+
+/**
+*@brief Get the next batch of data in data processing . \n
+
+*@par Attributes:
+*@li output_types: A nested structure of DType objects corresponding to each
+component of an element of this dataset.
+*@li output_shapes: A nested structure of TensorShape objects corresponding
+to each component of an element of this dataset.
+*@li channel_name: A string. Default "" . \n
+
+*@par Outputs:
+*y:A nested structure of Tensor objects . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow GetNext operator
+*/
+
+REG_OP(GetNext)
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
+                                        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
+    .ATTR(output_types, ListInt, {})
+    .ATTR(output_shapes, ListListInt, {})
+    .ATTR(output_num, Int, 1)
+    .ATTR(channel_name, String, "")
+    .OP_END_FACTORY_REG(GetNext)
+
+/**
+*@brief Get dynamic dims after GetNext. \n
+
+*@par Inputs:
+*input: A nested structure of Tensor objects, from GetNext's output. \n
+
+*@par Attributes:
+*@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
+*@li N: Inputs number. \n
+
+*@par Outputs:
+*dims: GE unknow dims, a vector of int64. \n
+*/
+
+REG_OP(GetDynamicDims)
+    .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(shape_info, ListInt)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(GetDynamicDims)
+
+/**
+*@brief End of sequence . \n
+
+*@par Inputs:
+*x: A Tensor of type uint8 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*/
+
+REG_OP(EndOfSequence)
+    .INPUT(x, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_UINT8}))
+    .OP_END_FACTORY_REG(EndOfSequence)
+
+/**
+*@brief: Computes the Gauss error function of `x` element-wise . \n
+
+*@par Inputs:
+*x: A Tensor of type float16, float32 or double. the format can be
+*    [NCHW,NC1HWC0,NHWC,ND]
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Erf.
+*/
+REG_OP(Erf)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Erf)
+
+/**
+*@brief: Computes the Gauss complementary error function of "x" element-wise . \n
+
+*@par Inputs:
+*x: A Tensor of type float16 ,float32, double . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Erfc.
+*/
+REG_OP(Erfc)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Erfc)
+
+/**
+*@brief This operation returns a rank 1 histogram counting the number of entries in `values`
+*  that fell into every bin.The bins are equal width and determined by the arguments
+*  'value_range' and 'nbins' . \n
+
+*@par Inputs:
+*Three inputs, including:
+*@li x: A Tensor of type float32, float16, int32, int64.
+*@li range: A Tensor of type float32,float16,int32, int64.
+*@li nbins: A Tensor of type int32 . \n
+
+*@par Attributes:
+* dtype: An optional attribute. Defaults to "int32" . \n
+
+*@par Outputs:
+*y: A Tensor. A Tensor of type int32 or int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator HistogramFixedWidth.
+*/
+REG_OP(HistogramFixedWidth)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .INPUT(nbins, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .ATTR(dtype, String, "int32")
+    .OP_END_FACTORY_REG(HistogramFixedWidth)
+
+/**
+*@brief This operation returns a rank 1 histogram counting the number of entries in `values`
+*  that fell into every bin.The bins are equal width and determined by the arguments
+*  'value_range' and 'nbins' . \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: A Tensor of type float32,float16,int32, int64.
+*@li range: A Tensor of type float32,float16,int32, int64 . \n
+
+*@par Attributes:
+*@li dtype: An optional attribute. Defaults to "int32".
+*@li nbins: A required attribute,the type is int32 . \n
+
+*@par Outputs:
+*y: A Tensor. A Tensor of type int32 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator HistogramFixedWidth.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use HistogramFixedWidth instead.
+*/
+REG_OP(HistogramFixedWidthD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(nbins, Int)
+    .ATTR(dtype, String, "int32")
+    .OP_END_FACTORY_REG(HistogramFixedWidthD)
+
+/**
+*@brief Returns the next representable value of x1 in the direction of x2, element-wise . \n
+
+*@par Inputs:
+*The input X1 and x2 must have the same type. Inputs include:
+*@li x1:A Tensor. Must be one of the following types: float32, double.
+*@li x2:A Tensor. Must have the same type as x1 . \n
+
+*@par Outputs:
+*output:A Tensor. Has the same type as x1 . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NextAfter operator
+*/
+REG_OP(NextAfter)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(NextAfter)
+
+/**
+ *@brief Compute element-wise finiteness, return a boolean tensor.
+
+ *@par Inputs:
+ *x:A Tensor.
+
+ *@par Outputs:
+ *y:A Tensor. Has the same shape as x.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow IsFinite operator.
+ */
+REG_OP(IsFinite)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(IsFinite)
+
+/**
+ *@brief Compute element-wise infiniteness, return a boolean tensor.
+
+ *@par Inputs:
+ *x:A Tensor.
+
+ *@par Outputs:
+ *y:A Tensor. Has the same shape as x.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow IsInf operator.
+ */
+REG_OP(IsInf)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(IsInf)
+
+/**
+ *@brief Computes the complex absolute value of a tensor.
+
+ *@par Inputs:
+ *x:A Tensor.
+
+ *@par Outputs:
+ *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow ComplexAbs operator.
+ */
+REG_OP(ComplexAbs)
+    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(ComplexAbs)
+
+/**
+ *@brief Returns which elements of x are NaN.
+
+ *@par Inputs:
+ *x:A Tensor.
+
+ *@par Outputs:
+ *y:A Tensor. Has the same shape as x.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow IsNan operator.
+ */
+REG_OP(IsNan)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(IsNan)
+
+/**
+ *@brief Returns the real part of a complex number.
+
+ *@par Inputs:
+ *input:A Tensor.
+
+ *@par Outputs:
+ *output:A Tensor. Has the same shape as input.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow Real operator.
+ */
+REG_OP(Real)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Real)
+
+/**
+ *@brief Returns the complex conjugate of a complex number.
+
+ *@par Inputs:
+ *input:A Tensor.
+
+ *@par Outputs:
+ *output:A Tensor. Has the same shape as input.
+
+ *@par Third-party framework compatibility.
+ *Compatible with tensorflow output operator.
+ */
+REG_OP(Conj)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Conj)
+
+/**
+*@brief The negative log likelihood loss . \n
+
+*@par Inputs:
+*The input x and weight must have the same type. Inputs include:
+*@li x: A Tensor dtype of float32.
+*@li target: A Tensor dtype of int32.
+*@li weight: A Tensor dtype of float32 . \n
+
+*@par Attributes:
+*reduction: An optional attribute. Defaults to "mean" . \n
+
+*@par Outputs:
+*@li y: A Tensor dtype of float32.
+*@li total_weight: A Tensor dtype of float32 . \n
+
+*@par Third-party framework compatibility
+*Compatible with pytorch NLLLoss operator
+*/
+REG_OP(NLLLoss)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(target, TensorType({DT_INT32}))
+    .INPUT(weight, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OUTPUT(total_weight, TensorType({DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
+    .OP_END_FACTORY_REG(NLLLoss)
+
+/**
+*@brief The negative log likelihood loss grad . \n
+
+*@par Inputs:
+*@li x:A Tensor dtype of float32.
+*@li y_grad:A Tensor dtype of float32.
+*@li target:A Tensor dtype of int32.
+*@li weight:A Tensor dtype of float32.
+*@li total_weight:A Tensor dtype of float32 . \n
+
+*@par Attributes:
+*reduction: An optional attribute. Defaults to "mean" . \n
+
+*@par Outputs:
+*x_grad: A Tensor. Must be the following type: float32 . \n
+
+*@par Third-party framework compatibility
+*Compatible with pytorch NLLLossGrad operator
+*/
+REG_OP(NLLLossGrad)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(y_grad, TensorType({DT_FLOAT}))
+    .INPUT(target, TensorType({DT_INT32}))
+    .INPUT(weight, TensorType({DT_FLOAT}))
+    .INPUT(total_weight, TensorType({DT_FLOAT}))
+    .OUTPUT(x_grad, TensorType({DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
+    .OP_END_FACTORY_REG(NLLLossGrad)
+
+/**
+*@brief The ifmr . \n
+
+*@par Inputs:
+*@li data:A Tensor of feature map
+*@li data_min:A Tensor of min value of feature map.
+*@li data_max:A Tensor of max value of feature map.
+*@li cumsum:A Tensor of cumsum bin of data . \n
+
+*@par Attributes:
+*min_percentile: min init percentile.
+*max_percentile: max init percentile.
+*search_range: search range.
+*search_step: step size of searching.
+*with_offset: whether using offset . \n
+
+*@par Outputs:
+*scale: optimal scale.
+*offset: optimal offset . \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+*/
+
+REG_OP(IFMR)
+  .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(data_min, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(data_max, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(cumsum, TensorType({DT_INT32}))
+  .OUTPUT(scale, TensorType({DT_FLOAT}))
+  .OUTPUT(offset, TensorType({DT_FLOAT}))
+  .REQUIRED_ATTR(min_percentile, Float)
+  .REQUIRED_ATTR(max_percentile, Float)
+  .REQUIRED_ATTR(search_range, ListFloat)
+  .REQUIRED_ATTR(search_step, Float)
+  .REQUIRED_ATTR(with_offset, Bool)
+  .OP_END_FACTORY_REG(IFMR)
+
+/**
+*@brief weights adaptive range quantization. \n
+
+*@par Inputs:
+*@li w:A Tensor of weights. \n
+*@li w_min:A Tensor of weights reduce_min. \n
+*@li w_max:A Tensor of weights reduce_max. \n
+
+*@par Attributes:
+*num_bits: the bits num used for quantize.
+*offset_flag: whether using offset. \n
+
+*@par Outputs:
+*y: fake quantized weights. \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(WtsARQ)
+  .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(w_min, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(w_max, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(num_bits, Int, 8)
+  .ATTR(offset_flag, Bool, false)
+  .OP_END_FACTORY_REG(WtsARQ)
+
+/**
+*@brief The acts_ulq. \n
+
+*@par Inputs:
+*@li x:A Tensor of feature map
+*@li clamp _min:A Tensor of min clamp value of feature map.
+*@li clamp _max:A Tensor of max clamp value of feature map.
+
+*@par Attributes:
+*fixed_min: fix min to zero.
+*num_bits: quant bits. \n
+
+*@par Outputs:
+*y: output fake quant feature map.
+*clamp_min_mask: where x > clamp_min
+*clamp_min_mask: where x < clamp_max
+*x_clamped_loss: clamp loss. \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(ActsULQ)
+  .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(clamp_min_mask, TensorType({DT_BOOL}))
+  .OUTPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(fixed_min, Bool, false)
+  .ATTR(num_bits, Int, 8)
+  .OP_END_FACTORY_REG(ActsULQ)
+
+/**
+*@brief The acts_ulq_input_grad. \n
+
+*@par Inputs:
+*@li y_grad: A Tensor of gradient
+*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
+*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'
+
+*@par Outputs:
+*x_grapd: The gradient of inpust. \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(ActsULQInputGrad)
+  .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(ActsULQInputGrad)
+
+/**
+*@brief The act_ulq_clamp_max_grad. \n
+
+*@par Inputs:
+*@li y_grad: A Tensor of gradient
+*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
+*@li x_clamped_loss: A Tensor of gradient. \n
+
+*@par Outputs:
+*clamp_max_grad: The gradient of clamp max. \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(ActULQClampMaxGrad)
+  .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(ActULQClampMaxGrad)
+
+/**
+*@brief The act_ulq_clamp_min_grad. \n
+
+*@par Inputs:
+*@li y_grad: A Tensor of gradient
+*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
+*@li x_clamped_loss: A Tensor of gradient. \n
+
+*@par Outputs:
+*clamp_min_grad: The gradient of clamp min. \n
+
+*@par Third-party framework compatibility
+*Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(ActULQClampMinGrad)
+  .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
+  .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OP_END_FACTORY_REG(ActULQClampMinGrad)
+
+/**
+* @brief Computes Lp norm.
+
+* @par Inputs:
+* @li x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li axes: ListInt, {} means all axes will be computed.
+* @li keepdim: Bool, default is false.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* @li y: An ND tensor of type float16, float32. The shape of y is depending
+* on axes and keepdim. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNorm.
+*/
+REG_OP(LpNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(axes, ListInt, {})
+    .ATTR(keepdim, Bool, false)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNorm)
+
+/**
+* @brief get complex.
+
+* @par Inputs:
+* @li real: An ND tensor of type  float32. double
+* @li imag: An ND tensor of type  float32. double \n
+*
+* @par Outputs:
+* @li out: An ND tensor of type complex64, complex128 \n
+*/
+REG_OP(Complex)
+    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(Tout, Type, DT_COMPLEX64)
+    .OP_END_FACTORY_REG(Complex)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Imag)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Imag)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Angle)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Angle)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h
new file mode 100644
index 00000000..6bff7f82
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h
@@ -0,0 +1,1048 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file matrix_calculation_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* @li bias: A optional 1D Tensor. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC] . \n
+
+*@par Attributes:
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+
+*@par Outputs:
+*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+REG_OP(MatMul)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(transpose_x1, Bool, false)
+    .ATTR(transpose_x2, Bool, false)
+    .OP_END_FACTORY_REG(MatMul)
+
+/**
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
+* @li bias: A 1D Tensor. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC] . \n
+
+*@par Attributes:
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+
+*@par Outputs:
+*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
+* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+REG_OP(MatMulV2)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .ATTR(transpose_x1, Bool, false)
+    .ATTR(transpose_x2, Bool, false)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(MatMulV2)
+
+
+/**
+*@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n
+
+*@attention Constraints:
+* For better performance, The k-axis must be aligned to 16 (input type
+* is float16) or 32 (input type is int8). \n
+
+*@par Inputs:
+*Five inputs, including:
+*@li a: A matrix Tensor. Must be one of the following types: float16, int8.
+* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
+*@li b: A matrix Tensor. Must be one of the following types: float16, int8.
+* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z).
+*@li c: A matrix Tensor. Must be one of the following types: float16, int32,
+* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
+*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
+* types: float16, int32, float32. Has format [ND].
+*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
+* types: float16, int32, float32. Has format [ND].
+* The format of a, b, c has restriction:\n
+* When type of a is int8 and type of c is int32, the format of a, b, c should
+* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n
+* When type of a is int8 and type of c is float32, the format of a, b, c should
+* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n
+* When type of a is float16 and type of c is float16, the format of a, b, c
+* should all be ND or FRACTAL_NZ.\n
+* When type of a is float16 and type of c is float32, the format of a, b, c
+* should all be ND or FRACTAL_NZ . \n
+
+*@par Attributes:
+*Two attributes, including:
+*@li transpose_a: Optional. A bool. If True, changes the shape of "a" from
+* [M, K] to [K, M].
+*@li transpose_b: Optional. A bool. If True, changes the shape of "b" from
+* [K, N] to [N, K] . \n
+
+*@par Outputs:
+*y: The result matrix Tensor. Must be one of the following types: float16,
+* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a.
+* 2D(ND) or 4D(FRACTAL_NZ).
+*/
+
+REG_OP(GEMM)
+    .INPUT(a, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(transpose_a, Bool, false)
+    .ATTR(transpose_b, Bool, false)
+    .OP_END_FACTORY_REG(GEMM)
+
+/**
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x1: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+
+*@par Attributes:
+*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
+*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+
+*@par Outputs:
+*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+
+REG_OP(BatchMatMul)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(adj_x1, Bool, false)
+    .ATTR(adj_x2, Bool, false)
+    .OP_END_FACTORY_REG(BatchMatMul)
+
+
+/**
+* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+* @li bias: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+
+* @par Attributes:
+* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
+* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+
+* @par Outputs:
+* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+
+REG_OP(BatchMatMulV2)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(adj_x1, Bool, false)
+    .ATTR(adj_x2, Bool, false)
+    .OP_END_FACTORY_REG(BatchMatMulV2)
+
+
+/**
+*@brief Computes half the L2 norm of a tensor without the sqrt . \n
+
+*@par Inputs:
+
+* x: A Tensor.
+*     TensorType::FloatingDataType() . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator L2Loss.
+*/
+REG_OP(L2Loss)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(L2Loss)
+
+/**
+*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*   qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixDiag.
+*/
+REG_OP(MatrixDiag)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiag)
+
+/**
+*@brief: Returns a batched diagonal tensor with a given batched diagonal values . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8.
+*@li assist: A Tensor of the same type as "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixDiag.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixDiag instead.
+*/
+REG_OP(MatrixDiagD)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(assist, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiagD)
+
+/**
+*@brief: Returns the batched diagonal part of a batched tensor . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*   qint8, quint8, qint32, uint16, complex128, uint32, uint64 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixDiagPart.
+*/
+REG_OP(MatrixDiagPart)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiagPart)
+
+/**
+*@brief: Returns the batched diagonal part of a batched tensor . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8.
+*@li assist: A Tensor of the same type as "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixDiagPart.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixDiagPart instead.
+*/
+REG_OP(MatrixDiagPartD)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(assist, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiagPartD)
+
+/**
+*@brief: Returns a batched matrix tensor with new batched diagonal values . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*@li diagonal: A Tensor of the same type as "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixSetDiag.
+*/
+REG_OP(MatrixSetDiag)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(diagonal, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixSetDiag)
+
+/**
+*@brief: Returns a batched matrix tensor with new batched diagonal values . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8.
+*@li diagonal: A Tensor of the same type as "x".
+*@li assist: A Tensor of the same type as "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MatrixSetDiag.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use MatrixSetDiag instead.
+*/
+REG_OP(MatrixSetDiagD)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(diagonal, TensorType::BasicType())
+    .INPUT(assist, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixSetDiagD)
+
+/**
+*@brief Applies sparse "updates" to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float32, int8, uint8, double,
+ * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
+ * uint64
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32, int64
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float32, int8, uint8, double,
+ * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
+ * uint64
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True",
+ * the operation will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNdUpdate.
+*/
+REG_OP(ScatterNdUpdate)
+    .INPUT(var, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(var,  TensorType::BasicType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterNdUpdate)
+
+/**
+*@brief Applies sparse addition to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An ND Tensor. \n
+
+*Must be one of the following types: float16, float32, bool, int8, uint8
+*@li indices: An ND Tensor. \n
+
+*Must be one of the following types: int32
+*@li updates: An ND Tensor. \n
+
+*Must be one of the following types: float16, float32, bool, int8, uint8
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator TensorScatterUpdate.
+*/
+REG_OP(TensorScatterUpdate)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(TensorScatterUpdate)
+
+/**
+*@brief Adds sparse "updates" to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor . \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+*@li indices: An ND Tensor of type int32 or int64.
+
+
+*@li updates: An Tensor. format:NCHW, NHWC . \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False". If "True", the operation
+* will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterAdd.
+*/
+REG_OP(ScatterAdd)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterAdd)
+
+/**
+*@brief Divides a variable reference by sparse updates . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterDiv.
+*/
+REG_OP(ScatterDiv)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterDiv)
+
+/**
+*@brief Applies sparse addition to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNdAdd.
+*/
+REG_OP(ScatterNdAdd)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterNdAdd)
+
+/**
+*@brief Applies sparse addition to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An ND Tensor. \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+*@li indices: An ND Tensor. \n
+
+*Must be one of the following types: int32
+*@li updates: An ND Tensor. \n
+
+* Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator TensorScatterAdd.
+*/
+REG_OP(TensorScatterAdd)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OP_END_FACTORY_REG(TensorScatterAdd)
+
+/**
+*@brief Applies sparse subtraction to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32, int64
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+*@par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNdSub.
+*/
+REG_OP(ScatterNdSub)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterNdSub)
+
+/**
+*@brief Applies sparse addition to individual values or slices in a Variable . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An ND Tensor. \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+*@li indices: An ND Tensor. \n
+
+*Must be one of the following types: int32
+*@li updates: An ND Tensor. \n
+
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Outputs:
+* y: A Tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator TensorScatterSub.
+*/
+REG_OP(TensorScatterSub)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OP_END_FACTORY_REG(TensorScatterSub)
+
+/**
+*@brief Subtracts sparse updates to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32, int64
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+*@par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterSub.
+*/
+REG_OP(ScatterSub)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterSub)
+
+/**
+*@brief: Returns the batched diagonal part of a batched tensor with "assist" . \n
+
+*@par Inputs:
+* Two inputs, including:
+* @li x: A Tensor of type float16, float32, or int32.
+* @li assist: A Tensor of the same type as "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator DiagPart.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use DiagPart instead.
+*/
+REG_OP(DiagPartD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OP_END_FACTORY_REG(DiagPartD)
+
+/**
+*@brief: Returns the batched diagonal part of a batched tensor . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types:
+*    float16, float32, int32, int64, double, complex64, complex128 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator DiagPart.
+*/
+REG_OP(DiagPart)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_DOUBLE,
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(DiagPart)
+
+/**
+*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li x: A Tensor of type float16, int8.
+*@li w: A weight matrix of type float16, int8.
+*@li b: A Tensor of type float16, int32, float32.
+*@li offset_w: A Tensor of type int8 . \n
+
+*@par Attributes:
+*@li num_output: Reserved.
+*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false".
+*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
+* The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
+*@li offset_x: Reserved . \n
+
+*@par Outputs:
+*y: The result tensor of type float16, int32, float32 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator InnerProduct . \n
+
+*@par Quantization supported or not
+* Yes
+*/
+REG_OP(FullyConnection)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
+    .REQUIRED_ATTR(num_output, Int)
+    .ATTR(transpose, Bool, false)
+    .ATTR(axis, Int, 1)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(FullyConnection)
+
+/**
+*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li x: A Tensor of type uint8, int8.
+*@li w: A weight matrix of type int8, int8.
+*@li w: A compress index matrix of type int8, int8.
+*@li b: A Tensor of type float16, int32, int32.
+*@li offset_w: A Tensor of type int8.i
+
+*@par Attributes:
+*@li num_output: Reserved.
+*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
+*@li axis: Reserved.
+*@li offset_x: Reserved . \n
+
+*@par Outputs:
+*y: The result tensor of type int32 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator InnerProduct . \n
+
+*@par Quantization supported or not
+* Yes
+*/
+REG_OP(FullyConnectionCompress)
+    .INPUT(x, TensorType({DT_UINT8, DT_INT8}))
+    .INPUT(w, TensorType({DT_INT8}))
+    .INPUT(comress_index, TensorType({DT_INT8}))
+    .OPTIONAL_INPUT(b, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(num_output, Int)
+    .ATTR(transpose, Bool, false)
+    .ATTR(axis, Int, 1)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(FullyConnectionCompress)
+
+/**
+*@brief Computes the confusion matrix from predictions and labels . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li labels: A Tensor. Must be one of the following types: float16, float32,
+* int32, int8, uint8.
+*@li predictions: A Tensor. Must be one of the following types: float16,
+* float32, int32, int8, uint8.
+*@li weights: A Tensor. Must be one of the following types: float16, float32,
+* int32, int8, uint8 . \n
+
+*@par Attributes:
+*@li num_classes: An integer for the shape of the output matrix.
+* No default value.
+*@li dtype: Data type of the confusion matrix. No default value . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "labels"
+
+*@attention Constraints:
+*@li "weights", "labels", and "predictions" are 1D tensors.
+*@li The output is with shape (num_classes, num_classes),
+* where, 1 <= num_classes <= 4096 . \n
+
+*@see Region()
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ConfusionMatrix.
+*/
+REG_OP(ConfusionMatrix)
+    .INPUT(labels, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16, DT_INT8, DT_UINT8}))
+    .INPUT(predictions, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16, DT_INT8, DT_UINT8}))
+    .OPTIONAL_INPUT(weights, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(num_classes, Int)
+    .REQUIRED_ATTR(dtype, String)
+    .OP_END_FACTORY_REG(ConfusionMatrix)
+
+/**
+*@brief Multiplies sparse updates into a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32
+*@li updates: An ND Tensor . \n
+
+*Must be one of the following types: float16, float, int32, int8, uint8
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True", the operation
+* will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterMul.
+*/
+REG_OP(ScatterMul)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterMul)
+
+/**
+*@brief Reduces sparse updates into a variable reference using
+ * the "min" operation . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor.
+*Must be one of the following types: float16, float, int32
+
+*@li indices: An ND Tensor.
+*Must be one of the following types: int32
+
+*@li updates: An ND Tensor.
+*Must be one of the following types: float16, float, int32
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True", the operation
+* will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterMin.
+*/
+REG_OP(ScatterMin)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterMin)
+
+/**
+*@brief Reduces sparse updates into a variable reference using the "max" operation . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor . \n
+
+*Must be one of the following types: float16, float, int32
+*@li indices: An NCHW, NHWC, or ND Tensor . \n
+
+*Must be one of the following types: int32
+*@li updates: An NCHW, NHWC, or ND Tensor . \n
+
+*Must be one of the following types: float16, float, int32
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+* If "True", the operation will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterMax.
+*/
+REG_OP(ScatterMax)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterMax)
+
+/**
+*@brief Applies sparse updates to a variable reference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li var: An ND Tensor . \n
+
+*Must be one of the following types: float16, float, int32, int8, uint8
+*@li indices: An ND Tensor . \n
+
+*Must be one of the following types: int32
+*@li updates: An ND Tensor . \n
+
+*Must be one of the following types: float16, float, int32, int8, uint8
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True",
+* the operation will be protected by a lock . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(ScatterUpdate)
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ScatterUpdate)
+
+/**
+*@brief Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched `input` . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li input: Rank `r` tensor where `r >= 2`. \n
+
+*@li k: \n
+*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
+*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
+*(for a single diagonal) or a pair of integers specifying the low and high ends \n
+*of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+*@li padding_value: The value to fill the area outside the specified diagonal band with. \n
+
+*@par Outputs:
+*diagonal: The extracted diagonal(s) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixDiagPartV2)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .INPUT(padding_value, TensorType::BasicType())
+    .OUTPUT(diagonal, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiagPartV2)
+
+/**
+*@brief Returns a batched matrix tensor with new batched diagonal values . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li input: "Rank `r+1`, where `r >= 1`. \n
+
+*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n
+
+*@li k:
+*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
+*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
+*(for a single diagonal) or a pair of integers specifying the low and high ends \n
+*of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+*@par Outputs:
+*output: Rank `r+1`, with `output.shape = input.shape` . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixSetDiagV2)
+    .INPUT(input, TensorType::BasicType())
+    .INPUT(diagonal, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixSetDiagV2)
+
+/**
+*@brief Returns a batched diagonal tensor with given batched diagonal values . \n
+
+*@par Inputs:
+* Five inputs, including:
+*@li diagonal: Rank `r`, where `r >= 1` \n
+
+*@li k:
+*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
+*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
+*(for a single diagonal) or a pair of integers specifying the low and high ends \n
+*of a matrix band. `k[0]` must not be larger than `k[1]`. \n
+
+*@li num_rows:
+*The number of rows of the output matrix. If it is not provided, the op assumes \n
+*the output matrix is a square matrix and infers the matrix size from k and the \n
+*innermost dimension of `diagonal`. \n
+
+*@li num_cols: An NCHW, NHWC, or ND Tensor.
+*The number of columns of the output matrix. If it is not provided, the op \n
+*assumes the output matrix is a square matrix and infers the matrix size from \n
+*k and the innermost dimension of `diagonal`. \n
+
+*@li padding_value: The number to fill the area outside the specified diagonal band with. \n
+
+*@par Outputs:
+*output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterUpdate.
+*/
+REG_OP(MatrixDiagV2)
+    .INPUT(diagonal, TensorType::BasicType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .INPUT(num_rows, TensorType({DT_INT32}))
+    .INPUT(num_cols, TensorType({DT_INT32}))
+    .INPUT(padding_value, TensorType::BasicType())
+    .OUTPUT(output, TensorType::BasicType())
+    .OP_END_FACTORY_REG(MatrixDiagV2)
+
+REG_OP(IndexAdd)
+    .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(IndexAdd)
+
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*@li diagonal:(int, optional) – the diagonal to consider。\n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Triu.
+*/
+REG_OP(Triu)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Triu)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h
new file mode 100644
index 00000000..ddd70bc8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h
@@ -0,0 +1,485 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_batch_norm_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Normalizes elements of a specific dimension of eigenvalues (L2) . \n
+
+*@par Inputs:
+*One input:
+*x: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue . \n
+
+*@par Attributes:
+*@li axis: A required attribute of type list, specifying the axis for normalization.
+*@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4" . \n
+
+*@par Outputs:
+*y: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue for normalization . \n
+
+*@par Third-party framework compatibility
+* Compatible with the L2 scenario of PyTorch operator Normalize.
+*/
+REG_OP(L2Normalize)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axis, ListInt, {})
+    .ATTR(eps, Float, 1e-4)
+    .OP_END_FACTORY_REG(L2Normalize)
+
+/**
+*@brief Performs the backpropagation of L2Normalize for training scenarios . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A multi-dimensional Tensor of type float16 or float32, specifying
+* the eigenvalue of forward inputs.
+*@li y: A multi-dimensional Tensor of type float16 or float32, specifying
+* the normalization result of the forward output.
+*@li dy: A multi-dimensional Tensor of type float16 or float32, specifying
+* the reverse input gradient . \n
+
+*@par Attributes:
+*@li axis: A required attribute of type int, specifying the axis to be
+* normalized.
+*@li eps: An optional attribute of type float, specifying the lower limit of
+* normalization. Defaults to "1e-4" . \n
+
+*@par Outputs:
+*dx: Reverse gradient of eigenvalue "x". Has the same dimensions as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the L2 scenario of PyTorch operator NormalizeGrad.
+*/
+REG_OP(L2NormalizeGrad)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(dx, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(eps, Float, 0.0001)
+    .OP_END_FACTORY_REG(L2NormalizeGrad)
+
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator fused_batch_norm.
+*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNorm)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm)
+
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator fused_batch_norm.
+*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNorm3D)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3D)
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC or NCHW supported)
+*@li x: A 4D Tensor of type float16 or float32.
+*@li scale: A 1D Tensor of type float32, for the scaling factor.
+*@li offset: A 1D Tensor of type float32, for the scaling offset.
+*@li mean: A 1D Tensor of type float32, for the mean used for inference.
+Must be "None" if the operation is used for training.
+*@li variance: A 1D Tensor of type float32, for the variance used for inference.
+Must be "None" if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value
+added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation
+is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC or NCHW supported)
+*@li y: A 4D Tensor of type float16 or float32, for the normalized "x".
+*@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 1D Tensor of type float32, for the variance of "x".
+*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation.
+*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference, then output "reserve_space_1"
+has the same value as "mean" and output "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNormExt2)
+    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_scale, TensorType({DT_FLOAT}))
+    .INPUT(input_offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(input_mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(input_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(output_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(output_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(output_reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(output_reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNormExt2)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Five inputs, including:
+*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
+*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
+*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
+*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li data_format: An optional string. Defaults to "NHWC".
+*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm . \n
+
+*@see BatchNorm
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNormGrad.
+*/
+REG_OP(BatchNormGrad)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNormGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Five inputs, including:
+*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
+*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li data_format: An optional string. Defaults to "NCDHW".
+*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm . \n
+
+*@see BatchNorm
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
+*/
+REG_OP(BatchNorm3DGrad)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3DGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Five inputs, including:
+*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
+*@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW.
+*@li scale: A 4D Tensor of type float32, with format NHWC or NCHW.
+*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2.
+*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n
+
+*@par Attributes:
+*@li epsilon: A required float32. A small float number added to the variance of "x".
+*@li data_format: A required string for the format.
+*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale".
+*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset".
+*@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW.
+*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BatchNormExt2 . \n
+
+*@see BatchNormExt2
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator FusedBatchNormGradV2.
+*/
+REG_OP(BatchNormGradExt2)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_3, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BatchNormGradExt2)
+
+
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
+*@li momentum: A Tensor,represents the mean and the variance's scale factor
+*@li scale: An optional tensor of type float16 or float32, no use
+*@li offset: An optional tensor of type float16 or float32, no use
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li use_global_stats: mean inference mode , only can be "True".
+*@li mode: An optional input, not use
+*@par Outputs:
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
+*/
+REG_OP(BNInference)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float,1e-5f)
+    .ATTR(use_global_stats, Bool,true)
+    .ATTR(mode, Int,1)
+    .OP_END_FACTORY_REG(BNInference)
+/**
+*@brief aicpu batch normalization host  . \n
+
+*@par Inputs:
+
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
+*@li momentum: An optional float, mean and variance's Scale factor
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li use_global_stats: mean inference mode , only can be "True".
+*@li mode: An optional attr, not use
+*@par Outputs:
+*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
+*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
+*/
+REG_OP(BnHost)
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.00001)
+    .ATTR(mode, Int, 1)
+    .ATTR(use_global_stats, Bool, true)
+    .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(BnHost)
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
+*@li scale: An optional tensor of type float16 or float32, no use
+*@li offset: An optional tensor of type float16 or float32, no use
+*@par Attributes:
+*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li use_global_stats: mean inference mode , only can be "True".
+*@li mode: An optional attr, not use
+*@par Outputs:
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
+*/
+REG_OP(BNInferenceD)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(momentum, Float,0.9)
+    .ATTR(epsilon, Float,1e-5f)
+    .ATTR(use_global_stats, Bool,true)
+    .ATTR(mode, Int,1)
+    .OP_END_FACTORY_REG(BNInferenceD)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h
new file mode 100644
index 00000000..53922ee6
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h
@@ -0,0 +1,1711 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_calculation_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+* @brief Computes the gradients of depthwise convolution with respect to
+* the filter . \n
+
+* @par Inputs:
+* Three inputs include: \n
+* @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C],
+* support float16, float32, double
+* @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K]
+* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
+* Must be one of the following types: float16, float32, double . \n
+
+* @par Attributes:
+* @li strides: A required list or tuple. The stride of the sliding window
+* for height and width of input "x" of the convolution.
+* Must be with shape [1, 1, stride_height, stride_width] or
+* [1, stride_height, stride_width, 1].
+* @li dilations: An optional list or tuple. The dilation factor for each
+* dimension of input "x".
+* If set to k > 1, there will be k-1 skipped cells between each filter element
+* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
+* or [1, dilation_height, dilation_width, 1].
+* @li pads: A required list or tuple. Padding added to each dimension of the
+* input.
+* @li data_format: An optional string. Input data format, either "NHWC" or
+* "NCHW" . \n
+
+* @par Outputs:
+* filter_grad: Gradient of the deep convolution relative to the filter with
+* shape [H, W, C, K]. Must be one of the following types: float16, float32,
+* double . \n
+
+* @attention Constraints:\n
+* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
+* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n
+* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape
+* [C1, Hf, Wf, K, Co, C0],
+* where K is fixed at 1, and Co and C0 are 16.\n
+* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
+* data is 5D with shape [N, C1, Ho, Wo, C0],
+* where C is the same as that of the feature map and C0 is 16.\n
+* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
+* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
+* @li Compatible with the Caffe operator DepthwiseConv2DBackpropFilter.
+*/
+REG_OP(DepthwiseConv2DBackpropFilter)
+    .INPUT(input, TensorType({float16}))
+    .INPUT(filter_size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(out_backprop, TensorType({float16}))
+    .OUTPUT(filter_grad, TensorType({float32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(DepthwiseConv2DBackpropFilter)
+
+/**
+* @brief Computes the gradients of depthwise convolution with respect to
+* the filter . \n
+
+* @par Inputs:
+* Two inputs include: \n
+* @li input: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16
+* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C],
+* of type float16
+
+* @par Attributes:
+* @li filter_size: A required list or tuple. Shape of filter.
+* @li strides: A required list or tuple. The stride of the sliding window for
+* height and width of input "x" of the convolution.
+* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
+* stride_width, 1].
+* @li dilations: An optional list or tuple. The dilation factor for each
+* dimension of input "x".
+* If set to k > 1, there will be k-1 skipped cells between each filter element
+* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
+* or [1, dilation_height, dilation_width, 1].
+* @li pads: A required list or tuple. Padding added to each dimension of the
+* input.
+* @li data_format: An optional string. Input data format, either "NHWC" or
+* "NCHW" . \n
+
+* @par Outputs:
+* filter_grad: Gradient of the deep convolution relative to the filter with
+* shape [H, W, C, K]. Must be of type float32 . \n
+
+* @attention Constraints:\n
+* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
+* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n
+* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape
+* [C1, Hf, Wf, K, Co, C0],
+* where K is fixed at 1, and Co and C0 are 16.\n
+* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
+* data is 5D with shape [N, C1, Ho, Wo, C0],
+* where C is the same as that of the feature map and C0 is 16.\n
+* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
+* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
+* @li Compatible with the Caffe operator DepthwiseConv2DBackpropFilter.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use DepthwiseConv2DBackpropFilter
+* instead.
+*/
+REG_OP(DepthwiseConv2DBackpropFilterD)
+    .INPUT(input, TensorType({float16}))
+    .INPUT(out_backprop, TensorType({float16}))
+    .OUTPUT(filter_grad, TensorType({float32}))
+    .REQUIRED_ATTR(filter_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(DepthwiseConv2DBackpropFilterD)
+
+/**
+* @brief Computes the gradients of depthwise convolution with respect to the
+* input . \n
+
+* @par Inputs:
+* Three inputs include: \n
+* @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C],
+* support int32, int64
+* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16.
+* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
+* Must be one of the following types: float16 . \n
+
+* @par Attributes:
+* @li strides: A required list or tuple of int32. The stride of the sliding window for
+* height and width of input "x" of the convolution.
+* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
+* stride_width, 1].
+* @li dilations: An optional list or tuple of int32. The dilation factor for each
+* dimension of input "x". Defaults to "[1, 1, 1, 1]".
+* If set to k > 1, there will be k-1 skipped cells between each filter element
+* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
+* or [1, dilation_height, dilation_width, 1].
+* @li pads: A required list or tuple of int32. Padding added to each dimension of the
+* input.
+* @li data_format: An optional string. Input data format, either "NHWC" or
+* "NCHW". Defaults to "NHWC" . \n
+
+* @par Outputs:
+* input_grad: Gradient of the deep convolution relative to the input with shape
+* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16 . \n
+
+* @attention Constraints:\n
+* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
+* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n
+* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape
+* [C1, Hf, Wf, K, Co, C0],
+* where K is fixed at 1, and Co and C0 are 16.\n
+* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
+* data is 5D with shape [N, C1, Ho, Wo, C0],
+* where C is the same as that of the feature map and C0 is 16.\n
+* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
+* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput.
+* @li Compatible with the Caffe operator DepthwiseConv2DBackpropInput.
+*/
+REG_OP(DepthwiseConv2DBackpropInput)
+    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(DepthwiseConv2DBackpropInput)
+
+/**
+* @brief Computes the gradients of depthwise convolution with respect to the
+* input . \n
+
+* @par Inputs:
+* Two inputs include: \n
+* @li filter: A 4D tensor of type float16, with shape [H, W, C, K]
+* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of
+* type float16
+
+* @par Attributes:
+* @li input_size: A required list or tuple. The origin shape of input.
+* @li strides: A required list or tuple. The stride of the sliding window for
+* height and width of input "x" of the convolution.
+* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
+* stride_width, 1].
+* @li dilations: An optional list or tuple. The dilation factor for each
+* dimension of input "x".
+* If set to k > 1, there will be k-1 skipped cells between each filter element
+* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
+* or [1, dilation_height, dilation_width, 1].
+* @li pads: A required list or tuple. Padding added to each dimension of the
+* input.
+* @li data_format: An optional string. Input data format, either "NHWC" or
+* "NCHW" . \n
+
+* @par Outputs:
+* input_grad: Gradient of the deep convolution relative to the input with
+* shape [N, C, H, W] or [N, H, W, C]. Must be of type float16 . \n
+
+* @attention Constraints:\n
+* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
+* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n
+* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape
+* [C1, Hf, Wf, K, Co, C0],
+* where K is fixed at 1, and Co and C0 are 16.\n
+* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
+* data is 5D with shape [N, C1, Ho, Wo, C0],
+* where C is the same as that of the feature map and C0 is 16.\n
+* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
+* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput.
+* @li Compatible with the Caffe operator DepthwiseConv2DBackpropInput.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use DepthwiseConv2DBackpropInput
+* instead.
+*/
+REG_OP(DepthwiseConv2DBackpropInputD)
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16}))
+    .OUTPUT(input_grad, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(DepthwiseConv2DBackpropInputD)
+
+/**
+*@brief Computes a 2D deep convolution given a 4D input tensor and a filter
+* tensor . \n
+
+*@par Inputs:
+*Two required inputs and two optional inputs, including: \n
+* @li x: A 4D tensor of type float16 or int8, with shape [N, C, H, W] or [N, H, W, C]
+* @li filter: A 4D tensor of type float16 or int8, with shape [H, W, C, K]
+* @li bias: An optional tensor of type float16 or int32
+* @li offset_w: An optional float16 or int8, used for quantized inference
+
+* @par Attributes:
+* @li strides: A required list or tuple. The stride of the sliding window for
+* height and width of input "x" of the convolution.
+* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
+* stride_width, 1].
+* @li dilations: An optional list or tuple. The dilation factor for each
+* dimension of input "x".
+* If set to k > 1, there will be k-1 skipped cells between each filter element
+* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
+* or [1, dilation_height, dilation_width, 1]. Defaults to "[1, 1, 1, 1]".
+* @li pads: A required list or tuple of int32. Padding added to each dimension of the
+* input.
+* @li data_format: An optional string. Input data format, either "NHWC" or
+* "NCHW". Defaults to "NHWC".
+* @li offset_x: An optional int. Input offset, used for quantized inference.
+* Defaults to 0 . \n
+
+* @par Outputs:
+* y: 4D tensor of type float16 or int32, with shape [N, C, H, W] or [N, H, W, C]
+
+* @attention Constraints:\n
+* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
+* the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n
+* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape
+* [C1, Hf, Wf, K, Co, C0],
+* where K is fixed at 1, and Co and C0 are 16.\n
+* Limited by the size of L1 buffer memory: \n
+* (l1_size - filter_h*filter_w*BLOCK_SIZE*BLOCK_SIZE*data_size) // (Wi *
+* BLOCK_SIZE * data_size) >= (BLOCK_SIZE * strides_h + filter_h - strides_h).\n
+
+* @par Quantization supported or not
+* Yes
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator DepthwiseConv2D.
+* @li Compatible with the Caffe operator DepthwiseConv2D.
+*/
+REG_OP(DepthwiseConv2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_FLOAT16, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(DepthwiseConv2D)
+
+/**
+*@brief Performs the the backward operation for "BiasAdd" on the "bias" tensor.
+*        It accumulates all the values from out_backprop into the feature
+*        dimension. For NHWC data format, the feature dimension is the last.
+*        For NCHW data format, the feature dimension is the third-to-last . \n
+
+*@par Inputs:
+*x: A Tensor of type NumberType . \n
+
+*@par Attributes:
+*data_format: Data format. Defaults to "NHWC" . \n
+
+*@par Outputs:
+*y: A Tensor.Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BiasAddGrad.
+*/
+REG_OP(BiasAddGrad)
+    .INPUT(x, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(BiasAddGrad)
+
+/**
+*@brief Computes the gradients of convolution with respect to the input.
+*@par Inputs:
+ * Three inputs:
+ * @li input_size: A const Tensor of type int32. Currently does not support
+ * data tensor. An integer vector representing the shape of input, where
+ * input is a 4-D tensor [batch, height, width, channels]
+ * or [batch, channels, height, width].
+ * @li filter: A Tensor. Must be one of the following types: float16, float32,
+ * float64. 4-D with shape
+ * [filter_height, filter_width, in_channels, out_channels]
+ * or [out_channels, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * @li out_backprop: A Tensor. Must have the same type as filter.
+ * 4-D with shape [batch, out_height, out_width, out_channels]
+ * or [batch, out_channels, out_height, out_width].
+ * Gradients with respect to the output of the convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | out_bckprop | filter  | y
+    ------------|-------------|---------|--------
+    | Data Type | float16     | float16 | float16
+    |           |-------------|---------|--------
+    |           | float32     | float32 | float32
+    |           |-------------|---------|--------
+    |           | float64     | float64 | float64
+    ------------|-------------|---------|--------
+    | Format    | NCHW        | NCHW    | NCHW
+    |           | NHWC        | HWCN    | NHWC
+@endverbatim
+ * For float32 and float64 type, the actual calculation on the chip is based on
+ * float16.
+ *\n
+ *
+*@par Attributes:
+ * Five attributes:
+ * @li strides: A tuple/list of 4 integers. The stride of the sliding window
+ * for H/W dimension. The index of H/W is same as data_format.
+ * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads
+ * on feature map
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each
+ * dimension of input, defaults to [1,1,1,1].
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
+ * "NHWC". Specify the data format of the input and output data.
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | input_size       | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | out_backprop     | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | y(fmap)          | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ *\n
+ *
+*@par Outputs:
+ * y: A Tensor. Has the same type as filter,and has same format as input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv2d_backprop_input
+*/
+REG_OP(Conv2DBackpropInput)
+    .INPUT(input_size, TensorType({DT_INT32}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Conv2DBackpropInput)
+
+/**
+*@brief Computes the gradients of convolution with respect to the input.
+*@par Inputs:
+ * Two inputs:
+ * @li filter: A Tensor. Types is float16.
+ * 4-D with shape [filter_height, filter_width, in_channels, out_channels]
+ * or [out_channels, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * @li out_backprop: A Tensor. Must have the same type as filter.
+ * 4-D with shape [batch, out_height, out_width, out_channels]
+ * or [batch, out_channels, out_height, out_width].
+ * Gradients with respect to the output of the convolution.
+*@par Attributes:
+ * Six attributes:
+ * @li input_size A Tensor of type int32. An integer vector representing the
+ * shape of input, where input is a 4-D tensor [batch, height, width, channels]
+ * or [batch, channels, height, width].
+ * @li strides: A tuple/list of 4 integers. The stride of the sliding window
+ * for H/W dimension. The index of H/W is same as data_format.
+ * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on
+ * feature map
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each
+ * dimension of input, defaults to [1,1,1,1].
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
+ * "NHWC". Specify the data format of the input and output data.
+*@par Outputs:
+ * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width,
+ * channels] or [batch, channels, height, width].
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv2d_backprop_input
+*@par Restrictions:
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead.
+*/
+REG_OP(Conv2DBackpropInputD)
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Conv2DBackpropInputD)
+
+/**
+*@brief Computes the Deconvolution with respect to the input.
+*@par Inputs:
+ * Three inputs:
+ * @li x: A Tensor of type float16 or int8.  4D with shape
+ * [batch, out_channels, out_height, out_width]. Gradients with respect
+ * to the output of the convolution.
+ * @li filter: A Tensor. Must have the same type as "x".
+ * 4D with shape [out_channels, in_channel, filter_height, filter_width].\n
+ * Two optional inputs:
+ * @li bias: An optional tensor. Must have the same type as "y".
+ * @li offset_w: An optional 1D tensor for quantized deconvolution.
+ * Type is int8. Reserved.\n
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | bias    | y
+    ------------|---------|---------|---------|--------
+    | Data Type | float16 | float16 | float16 | float16
+    |           |---------|---------|---------|--------
+    |           | int8    | int8    | int32   | int32
+    ------------|---------|---------|---------|--------
+    | Format    | NCHW    | NCHW    | ND      | NCHW
+@endverbatim
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
+*@par Attributes:
+ * Six attributes:
+ * @li strides: A tuple or list of 2 integers. The stride of the sliding window
+ * for H/W dimension, defaults to [1,1].
+ * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right]
+ * padding on the feature map, defaults to [0,0,0,0].
+ * @li dilations: A tuple or list of 4 integers. The dilation factor for each
+ * dimension of input, defaults to [1,1,1,1].
+ * @li groups: Number of blocked connections from input channels to
+ output channels. Defaults to "1".
+ * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
+  Specify the data format of the input and output data.
+ * @li offset_x: An optional integer for quantized deconvolution.
+ * Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | x (out_backprop) | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | y (fmap)         | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Offset_x         |          | [-128, 127]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ *\n
+ *
+*@par Outputs:
+ * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
+ * When type of x is float16, the type of y must be float16.
+ * When type of x is int8, the type of y must be int32.
+*/
+REG_OP(Deconvolution)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .ATTR(strides, ListInt, {1, 1})
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Deconvolution)
+/**
+*@brief Computes the gradients of convolution with respect to the filter
+*@par Inputs:
+ * Three inputs:
+ * @li x: A Tensor. Must be one of the following types: float16, float32,
+ * float64.4-D with shape [batch, in_height, in_width, in_channels] or
+ * [batch, in_channels, in_height, in_width].
+ * @li filter_size: A const Tensor of type int32. Currently does not support
+ * data tensor. An integer vector representing the tensor shape of filter,
+ * where filter is a 4-D tensor [filter_height, filter_width, in_channels,
+ * out_channels] or [out_channels, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * @li out_backprop: A Tensor. Must have the same type as x. 4-D with shape
+ * [batch, out_height, out_width, out_channels] or [batch, out_channels,
+ * out_height, out_width]. Gradients with respect to the output of the
+ * convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | out_backprop | y
+    ------------|---------|--------------|---------
+    | Data Type | float16 |    float16   | float16
+    |           |---------|--------------|---------
+    |           | float32 |    float32   | float32
+    |           |---------|--------------|---------
+    |           | float64 |    float64   | float64
+    |-----------|---------|--------------|---------
+    | Format    | NCHW    |     NCHW     | NCHW
+    |           | NHWC    |     NHWC     | HWCN
+@endverbatim
+ * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
+ * is based on float16.
+ *\n
+ *
+*@par Attributes:
+ * Five attributes:
+ * @li strides: A tuple/list of 4 integers. The stride of the sliding window
+ * for H/W dimension. The index of H/W is same as data_format.
+ * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on
+ * feature map.
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each
+ * dimension of input, defaults to [1,1,1,1].
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
+ * "NHWC". Specify the data format of the input and output data.
+ *\n
+*\n
+* The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | x(fmap)          | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter Size      | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | out_backprop     | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | y                | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+
+@endverbatim
+ * In Ascend910, out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ *\n
+ *
+*@par Outputs:
+ * y: A Tensor. Has the same type as x, has the same format as filter_size.
+ *\n
+ *     out_backprop_height = (in_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (in_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv2d_backprop_filter
+*/
+REG_OP(Conv2DBackpropFilter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(filter_size, TensorType({DT_INT32}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Conv2DBackpropFilter)
+
+/**
+*@brief Computes the gradients of convolution with respect to the filter.
+*@par Inputs:
+ * Two inputs:
+ * @li x: A Tensor. Type is float16.
+ * 4-D with shape [batch, in_height, in_width, in_channels] or [batch,
+ * in_channels, in_height, in_width].
+ * @li out_backprop: A Tensor. Must have the same type as x. 4-D with shape
+ * [batch, out_height, out_width, out_channels] or [batch, out_channels,
+ * out_height, out_width]. Gradients with respect to the output of the
+ * convolution.
+*@par Attributes:
+ * Six attributes:
+ * @li filter_size: A Tensor of type integers. An integer vector representing
+ * the tensor shape of filter,
+ * where filter is a 4-D tensor [filter_height, filter_width, in_channels,
+ * out_channels] or [out_channels, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * @li strides: A tuple/list of 4 integers. The stride of the sliding window
+ * for H/W dimension. The index of H/W is same as data_format.
+ * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on
+ * feature map
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each
+ * dimension of input, defaults to [1,1,1,1].
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
+ * "NHWC". Specify the data format of the input and output data.
+*@par Outputs:
+ * y: A Tensor. Type is float32, a 4-D tensor [filter_height, filter_width,
+ * in_channels, out_channels] or [out_channels, filter_height, filter_width,
+ * in_channels] or [out_channels, in_channel, filter_height, filter_width].
+ * Compatible with Tensorflow's conv2d_backprop_filter
+*@par Restrictions:
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropFilter instead.
+*/
+REG_OP(Conv2DBackpropFilterD)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(filter_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Conv2DBackpropFilterD)
+
+/**
+*@brief Computes a 2D convolution given 4D "x" and "filter" tensors.
+*@par Inputs:
+*@li x: A 4D tensor of input image. With the format "NHWC", the data is stored
+* in the order of: [batch, in_height, in_width, in_channels].
+*@li filter: A 4D tensor of learnable filters. Must have the same type as "x".
+* With the format "HWCN" , the data is stored in the order of: [filter_height,
+* filter_width, in_channels / groups, out_channels].
+*@li bias: An optional 1D tensor of additive biases to the filter outputs.
+* The data is stored in the order of: [out_channels].
+*@li offset_w: Reserved.
+*\n
+*\n
+* The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | bias    | y
+    ------------|---------|---------|---------|--------
+    | Data Type | float16 | float16 | float16 | float16
+    |           |---------|---------|---------|--------
+    |           | float32 | float32 | float32 | float32
+    |           |---------|---------|---------|--------
+    |           | int8    | int8    | int32   | int32
+    ------------|---------|---------|---------|--------
+    | Format    | NCHW    | NCHW    | ND      | NCHW
+    |           | NHWC    | HWCN    |         | NHWC
+@endverbatim
+* For float32 type, the actual calculation on the chip is based on
+* float16. For int8, a dequant or requant operator must be followed.
+*\n
+*
+*@par Attributes:
+*@li strides: Required. A list of 4 integers. The stride of the sliding window
+* for each dimension of input. The dimension order is determined by the data
+* format of "x". The N and C dimensions must be set to 1.
+*@li pads: Required. A list of 4 integers. The number of pixels to add to each
+* (top, bottom, left, right) side of the input.
+*@li dilations: Optional. A list of 4 integers. The dilation factor for each
+* dimension of input. The dimension order is determined by the data format of
+* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
+*@li groups: Optional. An integer of type int32. The number of blocked
+* connections from input channels to output channels. In_channels and
+* out_channels must both be divisible by "groups". Defaults to 1.
+*@li offset_x: Optional. An integer of type int32. The negative offset added
+* to the input image for int8 type. Ensure that the output is within the
+* effective range. Defaults to 0.
+*@li data_format: Reserved.
+*\n
+*\n
+* The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | Input Image Size | H        | [1, 100000]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter Size      | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Offset_x         |          | [-128, 127]
+
+@endverbatim
+* The W dimension of the input image supports cases exceeding 4096, but it may
+* cause compilation errors.
+*\n
+*
+*@par Outputs:
+*@li y: A 4D Tensor of output feature map. Has the same type as "x". With the
+* format "NHWC", the data is stored in the order of: [batch, out_height,
+* out_width, out_channels].
+*\n
+*     out_height = (in_height + pad_top + pad_bottom -
+*                   (dilation_h * (filter_height - 1) + 1))
+*                  / stride_h + 1
+*\n
+*     out_width = (in_width + pad_left + pad_right -
+*                  (dilation_w * (filter_width - 1) + 1))
+*                 / stride_w + 1
+*\n
+*
+*@par Quantization supported or not
+*@li Yes
+*
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator "conv2d".
+*@li Compatible with the Caffe operator 2D "Convolution".
+*/
+REG_OP(Conv2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv2D)
+
+/**
+*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
+*@par Inputs:
+* @li x: A 4D tensor of input images.
+* @li filter_compress: A 4D tensor of compressed filters.
+* @li compress_index: A 1D Tensor dtype of int8.
+* @li bias: An optional 1D tensor.
+* @li offset_w: An optional 1D tensor for quantized convolution. Reserved.
+*
+* The input and output tensor attributes are listed as follows:
+* @verbatim
+    |Tensor    | x       | filter_compress  | bias    | offset_w | y
+    -----------|---------|---------|---------|----------|--------
+    |Data Type | float16 | float16 | float16 | _        | float16
+    |          |---------|---------|---------|----------|--------
+    |          | float32 | float32 | float32 | _        | float32
+    |          |---------|---------|---------|----------|--------
+    |          | int8    | int8    | int32   | int8     | int32
+    -----------|---------|---------|---------|----------|--------
+    |Format    | NCHW    | NCHW    | ND      | ND       | NCHW
+    |          | NHWC    | NHWC    |         |          | NHWC
+    |          |         | HWCN    |         |          |
+@endverbatim
+* It should be noted that the data types must correspond to each other, but the
+* format does not need to . \n
+
+*@par Attributes:
+* @li strides: A list of 4 integers. Specifying the strides of the
+* convolution along the height and width. The dimension order is determined
+* by the data format of "x". By default the N and C dimensions are set to 1.
+* @li pads: A list of 4 integers. Specifying the top, bottom, left and right
+* padding.
+* @li dilations: A list of 4 integers. Specifying the dilation rate to use
+* for dilated convolution. Has the same dimension order and value as "strides".
+* @li groups: Number of blocked connections from input channels to output
+* channels. Input channels and output channels must both be divisible by
+* "groups".Type is int32.
+* @li offset_x: An optional integer for quantized convolution. Type is int32.
+* Defaults to "0".
+* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
+* data format of the input and output images. Type is string.
+* Defaults to "NHWC". Reserved . \n
+
+*@par Outputs:
+* @li y: A 4D Tensor of output images . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED.
+*/
+REG_OP(Conv2DCompress)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(compress_index, TensorType({DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv2DCompress)
+
+/**
+*@brief Computes a 2D deformable convolution given 4D "x", "filter" and
+* "offsets" tensors.
+*@par Inputs:
+*@li x: A 4D tensor of input image. With the format "NHWC", the data is stored
+* in the order of: [batch, in_height, in_width, in_channels].
+*@li filter: A 4D tensor of learnable filters. Must have the same type as "x".
+* With the format "HWCN" , the data is stored in the order of: [filter_height,
+* filter_width, in_channels / groups, out_channels].
+*@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format
+* "NHWC", the data is stored in the order of: [batch, out_height, out_width,
+* deformable_groups * filter_height * filter_width * 3].
+*@li bias: An optional 1D tensor of additive biases to the filter outputs.
+* The data is stored in the order of: [out_channels].
+*\n
+*\n
+* The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | offsets | bias     | y
+    ------------|---------|---------|---------|----------|--------
+    | Data Type | float16 | float16 | float16 | float16  | float16
+    |           |---------|---------|---------|----------|--------
+    |           | float32 | float32 | float32 | float32  | float32
+    ------------|---------|---------|---------|----------|--------
+    | Format    | NCHW    | NCHW    | NCHW    | ND       | NCHW
+    |           | NHWC    | HWCN    | NHWC    |          | NHWC
+@endverbatim
+* For float32 type, the actual convolution calculation part on the chip is
+* based on float16.
+*\n
+*
+*@par Attributes:
+*@li strides: Required. A list of 4 integers. The stride of the sliding window
+* for each dimension of input. The dimension order is interpreted according to
+* the data format of "x". The N and C dimensions must be set to 1.
+*@li pads: Required. A list of 4 integers. The number of pixels to add to each
+* (top, bottom, left, right) side of the input.
+*@li dilations: Optional. A list of 4 integers. The dilation factor for each
+* dimension of input. The dimension order is interpreted according to the data
+* format of "x". The N and C dimensions must be set to 1. Defaults to
+* [1, 1, 1, 1].
+*@li groups: Optional. An integer of type int32. The number of blocked
+* connections from input channels to output channels. In_channels and
+* out_channels must both be divisible by "groups". Defaults to 1.
+*@li data_format: Reserved.
+*@li deformable_groups: Optional. An integer of type int32. The number of
+* deformable group partitions. In_channels must be divisible by
+* "deformable_groups". Defaults to 1.
+*\n
+*\n
+* The following value range restrictions must be met:
+*@verbatim
+    | Name              | Field  | Scope
+    --------------------|--------|----------------------------
+    | Input Image Size  | H      | [1, 100000 / filter_height]
+    |                   | W      | [1, 4096 / filter_width]
+    --------------------|--------|----------------------------
+    | Filter Size       | H      | [1, 63]
+    |                   | W      | [1, 63]
+@endverbatim
+*\n
+*
+*@par Outputs:
+*@li y:  A 4D Tensor of output feature map. Has the same type as "x". With the
+* format "NHWC", the data is stored in the order of: [batch, out_height,
+* out_width, out_channels].
+*\n
+*     out_height = (in_height + pad_top + pad_bottom -
+*                   (dilation_h * (filter_height - 1) + 1))
+*                  / stride_h + 1
+*\n
+*     out_width = (in_width + pad_left + pad_right -
+*                  (dilation_w * (filter_width - 1) + 1))
+*                 / stride_w + 1
+*\n
+*
+*@par Quantization supported or not
+*@li No
+*
+*@par Third-party framework compatibility
+*@li Compatible with the Mxnet operator "DeformableConvolution".
+*@li Compatible with the Paddlepaddle operator "deformable_conv".
+*@li Compatible with the Mmcv operator "deform_conv".
+*/
+REG_OP(DeformableConv2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(deformable_groups, Int, 1)
+    .OP_END_FACTORY_REG(DeformableConv2D)
+
+/**
+*@brief Computes a 3D convolution given 5D "x" and "filter" tensors.
+ *@par Inputs:
+ * @li x: A 5D tensor. Must be one of the following types: float16,
+ * (Currently does not support int8). The format of x is NCDHW or NDHWC.
+ * @li filter: A 5D tensor of the same type as "x".
+ * (Currently does not support int8).
+ * The format is NCDHW, NDHWC or DHWCN . \n
+
+*@par Optional input:
+ * @li bias: An optional 1D tensor of the same type as "x".
+ * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
+
+*@par Required Attributes:
+ * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+ * for each dimension of "x".
+ * The N and C dimensions must be 1. Has the same format as "x".
+ * @li pads: A list of 6 integers.
+ * Supports only padding along the D, H and W dimensions in sequence of head,
+ * tail, top, bottom, left and right . \n
+
+*@par Attributes:
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li dilations: A list of 5 integers. Specifies the dilation factor for each
+ * dimension of "x".
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li offset_x: An optional int. Input offset, used for quantized inference.
+ * Defaults to 0. Reserved . \n
+
+*@par Outputs:
+ *y: A Tensor. Has the same type and data format as "x". \n
+
+*@attention Constraints:
+ *The image size after padding is greater than the filter size . \n
+
+*@par Third-party framework compatibility
+ * @li Compatible with the TensorFlow operator conv3d.
+ * @li Compatible with the Caffe operator Convolution.
+*/
+REG_OP(Conv3D)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv3D)
+
+
+/**
+*@brief Computes the gradients of convolution 3d with respect to the input.
+*@par Inputs:
+ * Three inputs:
+ * @li input_size: A Tensor of type int32, int64. An integer vector representing
+ * the shape of input, where input is a 5-D tensor
+ * [batch, depth, height, width, channels] or
+ * [batch, channels, depth, height, width].
+ * @li filter: A Tensor. Must be one of the following types: float16, float32.
+ * Currently does not support double.
+ * @li out_backprop: A Tensor. Must have the same type as filter.
+ * 5-D with shape [batch, depth, out_height, out_width, out_channels]
+ * or [batch, out_channels, depth, out_height, out_width]. Gradients with
+ * respect to the output of the convolution . \n
+
+*@par Required Attributes:
+ * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+ * for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
+ * @li pads: A list of 6 integers.
+ * Supports only padding along the D, H and W dimensions in sequence of head,
+ * tail, top, bottom, left and right . \n
+
+*@par Attributes:
+ * Three attributes:
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * dimension of the input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+
+*@par Outputs:
+ * y: A Tensor. Has the same type as filter,and has same format as "input_size"
+
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv3d_backprop_input
+*/
+REG_OP(Conv3DBackpropInput)
+    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(Conv3DBackpropInput)
+
+/**
+*@brief Computes the gradients of convolution 3d with respect to the input.
+*@par Inputs:
+ * Two inputs:
+ * @li filter: A Tensor whose type is float16. The format of filter is NCDHW,
+ * NDHWC or DHWCN.
+ * @li out_backprop: A Tensor. Must have the same type as filter. The format is
+ * NDHWC or NCDHW.  \n
+
+*@par Required Attributes:
+ * @li strides: A list of 5 integers. Specifies the stride of the sliding window
+ * for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
+ * @li pads: A list of 6 integers. Supports only padding along the D, H and W
+ * dimensions in sequence of head, tail, top, bottom, left and right.
+ * @li input_size: A tuple/list of type int32, int64. An integer vector
+ * representing the shape of input, where input is a 5-D tensor
+ * [batch, depth, height, width, channels] or
+ * [batch, channels, depth, height, width] . \n
+
+*@par Attributes:
+ * Three attributes:
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
+*@par Outputs:
+ * y: A Tensor. Has the same type and data format as "out_backprop".
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv3d_backprop_input
+
+*@par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropInput instead.
+*/
+REG_OP(Conv3DBackpropInputD)
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(Conv3DBackpropInputD)
+
+/**
+*@brief Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence . \n
+
+*@par Inputs:
+* @li x: A Tensor dtype of float16.
+* @li cont: A Tensor dtype of float16, float32.
+* @li w_x: A Tensor dtype of float16.
+* @li bias: A Tensor dtype of int16, int32, float16, float32.
+* @li w_h: A Tensor dtype of float16.
+* @li x_static: A optinal Tensor dtype of float16.
+* @li h_0: A optinal Tensor dtype of float16, float32.
+* @li c_0: A optinal Tensor dtype of float16, float32.
+* @li w_x_static: A optinal Tensor dtype of float16 . \n
+
+*@par Attributes:
+*@li num_output: A Scalar of output size dtype of int.
+*@li expose_hidden: A Scalar(bool) of features hidden . \n
+
+*@par Outputs:
+*@li h: A Tensor dtype of float16, float32.
+* @li h_t: A optinal Tensor dtype of float16, float32. The hidden state at time t.
+* @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the Caffe operator LSTM.
+*/
+REG_OP(LSTM)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(cont, TensorType({DT_FLOAT32,DT_FLOAT16}))
+    .INPUT(w_x, TensorType({DT_FLOAT16}))
+    .INPUT(bias, TensorType({DT_FLOAT16,DT_FLOAT32,DT_INT16,DT_INT32}))
+    .INPUT(w_h, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(x_static, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(h_0, TensorType({DT_FLOAT16,DT_FLOAT32}))
+    .OPTIONAL_INPUT(c_0, TensorType({DT_FLOAT16,DT_FLOAT32}))
+    .OPTIONAL_INPUT(w_x_static, TensorType({DT_FLOAT16}))
+    .OUTPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(h_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(c_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(num_output, Int, 0)
+    .ATTR(expose_hidden, Bool, false)
+    .OP_END_FACTORY_REG(LSTM)
+
+/**
+*@brief Computes the gradients of convolution3D with respect to the filter
+*@par Inputs:
+ * Three inputs:
+ * @li x: A Tensor. Must be one of the following types: float16, float32.
+ * Currently does not support double.
+ * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
+ * or [batch, in_channels, in_depth, in_height, in_width].
+ * @li filter_size: A Tensor of type int32. An integer vector representing the
+ * tensor shape of filter, where filter is a 5-D tensor
+ * [filter_depth, filter_height, filter_width, in_channels, out_channels]
+ * [out_channels, in_channels, filter_depth, filter_height, filter_width]
+ * or [out_channels, filter_depth, filter_height, filter_width, in_channels].
+ * @li out_backprop: A Tensor. Must have the same type as x.
+ * 5-D with shape [batch, out_depth, out_height, out_width, out_channels]
+ * or [batch, out_channels, out_depth, out_height, out_width].
+ * Gradients with respect to the output of the convolution. \n
+
+*@par Required Attributes:
+ * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+ * window for each dimension of "x". The N and C dimensions must be 1.
+ * Has the same format as "x".
+ * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
+ * pads on feature map . \n
+
+*@par Attributes:
+ * Three attributes:
+ * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+
+*@par Outputs:
+ * y: A Tensor that has the same type as "x"
+ * and the format is NDHWC, NCDHW or DHWCN.
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv3d_backprop_filter
+*/
+REG_OP(Conv3DBackpropFilter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(filter_size, TensorType({DT_INT32}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(Conv3DBackpropFilter)
+
+/**
+*@brief Computes the gradients of convolution with respect to the filter.
+*@par Inputs:
+ * Two inputs:
+ * @li x: A Tensor of type float16.
+ * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
+ * or [batch, in_channels, in_depth, in_height, in_width].
+ * @li out_backprop: A Tensor. Must have the same type as x.
+ * 5-D with shape [batch, out_depth, out_height, out_width, out_channels]
+ * or [batch, out_channels, out_depth, out_height, out_width].
+ * Gradients with respect to the output of the convolution. \n
+
+*@par Required Attributes:
+ * @li filter_size: A tuple/list of type integers. An integer vector
+ * representing the tensor shape of filter, where filter is a 5-D tensor
+ * [filter_depth, filter_height, filter_width, in_channels, out_channels],
+ * [out_channels, filter_depth, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channels, filter_depth, filter_height, filter_width].
+ * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+ * window for each dimension of "x".
+ * The N and C dimensions must be 1. Has the same format as "x".
+ * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right]
+ * pads on feature map. \n
+
+*@par Attributes:
+ * Three attributes:
+ * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+
+*@par Outputs:
+ * y: A Tensor of type float32 and the format is NDHWC, NCDHW or DHWCN.
+*@par Third-party framework compatibility
+ * Compatible with Tensorflow's conv3d_backprop_filter
+*@par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DBackpropFilter instead.
+*/
+
+
+REG_OP(Conv3DBackpropFilterD)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(filter_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(Conv3DBackpropFilterD)
+
+/**
+*@brief Computes the transpose of convolution 3d with respect to the input.
+*@par Inputs:
+ * Three inputs:
+ * @li input_size: A Tensor of type int32. An integer vector representing the
+ * shape of input.
+ * @li x: A Tensor of type float16, currently does not support int8. The format
+ * is NDHWC or NCDHW.
+ * @li filter: A Tensor of type float16, currently does not support int8.
+ * The format is NDHWC, NCDHW or DHWCN.
+
+*@par Optional input:
+ * Two optional inputs
+ * @li bias: An optional 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
+
+*@par Required Attributes:
+ * @li strides: A tuple/list of 5 integers. Specifies the stride of the sliding
+ * window for each dimension of "x".
+ * The N and C dimensions must be 1. Has the same format as "x".
+ * @li pads: A tuple/list of 6 integers
+
+*@par Attributes:
+ * Five attributes:
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li dilations: A tuple/list of 5 integers,
+ * The dilation factor for each dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li output_padding: The size will be added in the output shape.
+ * @li offset_x: Input offset_x value. Reserved.
+*@par Outputs:
+ * y: A Tensor. Has the same type and format as "x".
+*/
+REG_OP(Conv3DTranspose)
+    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .ATTR(output_padding, ListInt, {0, 0, 0, 0, 0})
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv3DTranspose)
+
+/**
+*@brief Computes the transpose of convolution 3d with respect to the input.
+*@par Inputs:
+ * @li x: A Tensor of type float16, currently does not support int8.
+ * The format is NDHWC or NCDHW.
+ * @li filter: A Tensor of type float16, currently does not support int8.
+ * The format is NDHWC, NCDHW or DHWCN.
+
+*@par Optional inputs:
+ * @li bias: An optional 1D tensor of the same type as "x". Reserved.
+ * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved . \n
+
+*@par Required Attributes:
+ * @li input_size: A tuple/list of type int32.
+ * An integer vector representing the shape of input
+ * @li strides: A tuple/list of 5 integers.
+ * Specifies the stride of the sliding window for each dimension of "x".
+ * The N and C dimensions must be 1. Has the same format as "x".
+ * @li pads: A tuple/list of 6 integers . \n
+
+*@par Attributes:
+ * Five attributes:
+ * @li dilations: A tuple/list of 5 integers, The dilation factor for each
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * @li data_format: An optional string from: "NDHWC", "NCDHW".
+ * Defaults to "NDHWC". Specify the data format of the input and output data.
+ * @li output_padding: The size will be added in the output shape.
+ * @li offset_x: Input offset_x value. Reserved.
+*@par Outputs:
+ * y: A Tensor. Has the same type and format as "x".
+*@par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
+*/
+REG_OP(Conv3DTransposeD)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NDHWC")
+    .ATTR(output_padding, ListInt, {0, 0, 0, 0, 0})
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv3DTransposeD)
+
+/**
+*@brief Computes the transpose of convolution 2d with respect to the input.
+*@par Inputs:
+ * Five inputs:
+ * @li input_size: A Tensor of type int32 or int64. An integer vector
+ * representing the shape of input, where input is a 4-D tensor
+ * [batch, height, width, channels] or [batch, channels, height, width].
+ * @li x: A Tensor of type float16, int8. 4-D with shape [batch, out_height,
+ * out_width, out_channels] or [batch, out_channels, out_height, out_width].
+ * @li filter: A Tensor of type float16, int8. Must have the same type as "x".
+ * 4-D with shape [filter_height, filter_width, in_channels, out_channels]
+ * or [out_channels, filter_height, filter_width, in_channels]
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
+ * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | bias    | y
+    ------------|---------|---------|---------|--------
+    | Data Type | float16 | float16 | float16 | float16
+    |           |---------|---------|---------|--------
+    |           | int8    | int8    | int32   | int32
+    ------------|---------|---------|---------|--------
+    | Format    | NCHW    | NCHW    | ND      | NCHW
+    |           | NHWC    | HWCN    |         | NHWC
+@endverbatim
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
+*@par Required Attributes:
+ * @li strides: A required tuple/list of 4 integers. The stride of the sliding
+ * window for H/W dimension. The index of H/W is same as data_format.
+ * @li pads: A required tuple/list of 4 integers, [top, bottom, left, right]
+ * pads on feature map.
+*@par Attributes:
+ * Five attributes:
+ * @li groups: Number of blocked connections from input channels to output
+ * channels.
+ * Defaults to "1".
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each
+ * dimension of input. Must be [1, 1, 1, 1].
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".
+ * Specify the data format of the input and output data.
+ * @li output_padding: The size will be added in the output shape. Defaults
+ * to [0, 0, 0, 0].
+ * @li offset_x: An optional int. Input offset, used for quantized inference.
+ * Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | input_size       | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | x (out_backprop) | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | y (fmap)         | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Offset_x         |          | [-128, 127]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ *\n
+ *
+*@par Outputs:
+ * y: A Tensor. A Tensor of type float16 or int32, and has same format as
+ * input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
+*/
+REG_OP(Conv2DTranspose)
+    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(output_padding, ListInt, {0, 0, 0, 0})
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv2DTranspose)
+
+/**
+*@brief Computes the transpose of convolution 2d with respect to the input.
+*@par Inputs:
+ * Four inputs:
+ * @li x: A Tensor of type float16, int8.
+ * @li filter: A Tensor of type float16, int8. Must have the same type as "x".
+ * @li bias: An optional 1D tensor of the same type as "x".
+ * @li offset_w: An optional 1D tensor for quantized inference. Type is int8. Reserved.
+*@par Required Attributes:
+ * @li input_size: A Tensor of type int32 or int64. An integer vector representing the
+ * shape of input.
+ * @li strides: A required list or tuple. The stride of the sliding window for
+ * height and width for H/W dimension.
+ * @li pads: A required list or tuple of int32. Padding added to each dimension
+ * of the input.
+*@par Attributes:
+ * Five attributes:
+ * @li groups: Number of blocked connections from input channels to output channels.
+ * Defaults to "1".
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
+ * of input. Must be [1, 1, 1, 1].
+ * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".
+ * Specify the data format of the input and output data.
+ * @li output_padding: The size will be added in the output shape. Defaults
+ * to [0, 0, 0, 0].
+ * @li offset_x: An optional int. Input offset, used for quantized inference.
+ * Defaults to "0".
+*@par Outputs:
+ * y: A Tensor. Has the same type as "filter".
+*@par Restrictions:
+ * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DTranspose instead.
+*/
+REG_OP(Conv2DTransposeD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(input_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(groups, Int, 1)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(output_padding, ListInt, {0, 0, 0, 0})
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(Conv2DTransposeD)
+
+/**
+*@brief Computes the deformed convolution output with the expected input
+*@par Inputs:
+ * Two inputs:
+ * @li x: A Tensor of type float16,float32
+ * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
+*@par Required Attributes:
+ * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
+ * height and width for H/W dimension.
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
+ * of the input.
+ * @li ksize: A tuple/list of 2 integers.kernel size.
+*@par Attributes:
+ * Four attributes:
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
+ * of input.  Defaults to [1, 1, 1, 1]
+ * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
+ * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1
+*@par Outputs:
+ * y: A Tensor. A Tensor of type float16, float32.
+*/
+REG_OP(DeformableOffsets)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
+    .OP_END_FACTORY_REG(DeformableOffsets)
+
+/**
+*@brief Computes the gradients of DeformableOffsets with respect to input and offsets
+*@par Inputs:
+ * Three inputs:
+ * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
+ * @li x: A Tensor of type float16,float32.
+ * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
+*@par Required Attributes:
+ * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
+ * height and width for H/W dimension.
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
+ * of the input.
+ * @li ksize: A tuple/list of 2 integers.kernel size.
+*@par Attributes:
+ * Three attributes:
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
+ * of input.  Defaults to [1, 1, 1, 1]
+ * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
+ * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
+*@par Outputs:
+ * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
+ * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
+*/
+REG_OP(DeformableOffsetsGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
+    .OP_END_FACTORY_REG(DeformableOffsetsGrad)
+
+/**
+*@brief Computes the deformed dilation output with the expected input
+*@par Inputs:
+ * One inputs:
+ * @li x: A Tensor of type int8, float16, float32
+*@par Required Attributes:
+ * @li dilations: A tuple/list of integers.
+*@par Attributes:
+ * Two attributes:
+ * @li padding_value: default value filling in blank
+ * @li pads: A tuple/list of integers.
+*@par Outputs:
+ * y: A Tensor. A Tensor of type int8, float16, float32.
+*/
+REG_OP(Dilation)
+    .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(dilations, ListInt)
+    .ATTR(pads, ListInt, {})
+    .ATTR(padding_value, Float, 0.0)
+    .OP_END_FACTORY_REG(Dilation)
+
+}  // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h
new file mode 100644
index 00000000..af59b4e2
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h
@@ -0,0 +1,1654 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_detect_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Generates bounding boxes based on "rois" and "deltas".
+* It is a customized FasterRcnn operator . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li rois: Region of interests (ROIs) generated by the region proposal
+* network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
+* "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
+* "y0", and "y1".
+*@li deltas: Absolute variation between the ROIs generated by the RPN and
+* ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
+* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n
+
+*@par Attributes:
+*@li means: An index of type int. Defaults to [0,0,0,0].
+* "deltas" = "deltas" x "stds" + "means".
+*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
+* "deltas" = "deltas" x "stds" + "means".
+*@li max_shape: Shape [h, w], specifying the size of the image transferred to
+* the network. Used to ensure that the bbox shape after conversion does not
+* exceed "max_shape".
+*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
+* within (-wh_ratio_clip, wh_ratio_clip) . \n
+
+*@par Outputs:
+*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
+* and type as "rois".
+*/
+REG_OP(BoundingBoxDecode)
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
+    .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
+    .REQUIRED_ATTR(max_shape, ListInt)
+    .ATTR(wh_ratio_clip, Float, 0.016)
+    .OP_END_FACTORY_REG(BoundingBoxDecode)
+
+/**
+*@brief Computes the coordinate variations between bboxes and ground truth
+* boxes. It is a customized FasterRcnn operator . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
+* "N" indicates the number of bounding boxes, and the value "4" refers to
+* "x0", "x1", "y0", and "y1".
+*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
+* shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
+* refers to "x0", "x1", "y0", and "y1" . \n
+
+*@par Attributes:
+*@li means: An index of type int. Defaults to [0,0,0,0].
+* "deltas" = "deltas" x "stds" + "means".
+*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
+* "deltas" = "deltas" x "stds" + "means" . \n
+
+*@par Outputs:
+*delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
+*/
+REG_OP(BoundingBoxEncode)
+    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
+    .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
+    .OP_END_FACTORY_REG(BoundingBoxEncode)
+
+/**
+*@brief Judges whether the bounding box is valid. It is a customized
+* FasterRcnn operator . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
+* "N" indicates the number of bounding boxes, the value "4" indicates "x0",
+* "x1", "y0", and "y1".
+*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
+* with shape (16,)
+
+*@par Outputs:
+*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
+* in an image. "1" indicates valid, while "0" indicates invalid . \n
+
+*@attention Constraints:
+* 16 "img_metas" are input. The first three numbers (height, width, ratio) are
+* valid, specifying the valid boundary (heights x ratio, weights x ratio).
+*/
+REG_OP(CheckValid)
+    .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
+    .INPUT(img_metas, TensorType({DT_FLOAT16}))
+    .OUTPUT(valid_tensor, TensorType({DT_INT8}))
+    .OP_END_FACTORY_REG(CheckValid)
+
+/**
+*@brief Computes the intersection over union (iou) or the intersection over
+* foreground (iof) based on the ground-truth and predicted regions . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
+* shape (N, 4). "N" indicates the number of bounding boxes, and the value
+* "4" refers to "x0", "x1", "y0", and "y1".
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
+* with shape (M, 4). "M" indicates the number of ground truth boxes, and
+* the value "4" refers to "x0", "x1", "y0", and "y1" . \n
+
+*@par Attributes:
+*mode: Computation mode, a character string with the value range of [iou, iof] . \n
+
+*@par Outputs:
+*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
+* the IoU or IoF ratio . \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported. To avoid overflow, the input
+* length and width are scaled by 0.2 internally.
+*/
+REG_OP(Iou)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(Iou)
+
+/**
+*@brief Performs the backpropagation of ROIAlign for training scenarios . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li ydiff: A 5HD gradient input of type float32.
+*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
+the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
+*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
+
+*@par Attributes:
+*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
+*@li pooled_width: A required attribute of type int, specifying the W dimension.
+*@li pooled_height: A required attribute of type int, specifying the H dimension.
+*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
+*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
+sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
+equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
+
+*@par Outputs:
+*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
+*/
+REG_OP(ROIAlignGrad)
+    .INPUT(ydiff, TensorType({DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
+    .OUTPUT(xdiff, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(xdiff_shape, ListInt)
+    .REQUIRED_ATTR(pooled_width, Int)
+    .REQUIRED_ATTR(pooled_height, Int)
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .ATTR(sample_num, Int, 2)
+    .OP_END_FACTORY_REG(ROIAlignGrad)
+
+/**
+*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li features: A 5HD Tensor of type float32 or float16.
+*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+the value "5" indicates the indexes of images where the ROIs are located,
+* "x0", "y0", "x1", and "y1".
+*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n
+
+*@par Attributes:
+*@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
+*@li pooled_height: A required attribute of type int32, specifying the H dimension.
+*@li pooled_width: A required attribute of type int32, specifying the W dimension.
+*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
+* the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
+*@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n
+
+*@par Outputs:
+* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
+The axis N is the number of input ROIs. Axes H, W, and C are consistent
+* with the values of "pooled_height",
+* "pooled_width", and "features", respectively.
+*/
+REG_OP(ROIAlign)
+    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .REQUIRED_ATTR(pooled_height, Int)
+    .REQUIRED_ATTR(pooled_width, Int)
+    .ATTR(sample_num, Int, 2)
+    .ATTR(roi_end_mode, Int, 1)
+    .OP_END_FACTORY_REG(ROIAlign)
+
+/**
+*@brief Performs SSD prior box detection . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li img: source image. Has the same type and format as "x" . \n
+
+*@par Attributes:
+*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
+*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
+*@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
+is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
+*@li img_h: An optional int32, specifying the source image height. Defaults to "0".
+*@li img_w: An optional int32, specifying the source image width. Defaults to "0".
+*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
+*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
+*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
+*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
+*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
+*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
+
+*@par Outputs:
+*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
+
+*@attention Constraints:
+* This operator applies only to SSD networks.
+*@see SSDDetectionOutput()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+ REG_OP(PriorBox)
+     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .REQUIRED_ATTR(min_size, ListFloat)
+     .REQUIRED_ATTR(max_size, ListFloat)
+     .REQUIRED_ATTR(aspect_ratio, ListFloat)
+     .ATTR(img_h, Int, 0)
+     .ATTR(img_w, Int, 0)
+     .ATTR(step_h, Float, 0.0)
+     .ATTR(step_w, Float, 0.0)
+     .ATTR(flip, Bool, true)
+     .ATTR(clip, Bool, false)
+     .ATTR(offset, Float, 0.5)
+     .ATTR(variance, ListFloat, {0.1})
+     .OP_END_FACTORY_REG(PriorBox);
+
+/**
+*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
+
+*@par Inputs:
+* Six inputs, including:
+*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li img: source image. Has the same type and format as "x".
+*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
+*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
+*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
+*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
+
+*@par Attributes:
+*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
+*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
+*@li img_h: An optional int32, specifying the height of the source image.
+*@li img_w: An optional int32, specifying the width of the source image.
+*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
+*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
+*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
+*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
+*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
+*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
+
+*@par Outputs:
+*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
+
+*@attention Constraints:
+* This operator applies only to SSD networks.
+*@see SSDDetectionOutput()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
+*/
+ REG_OP(PriorBoxD)
+     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .REQUIRED_ATTR(min_size, ListFloat)
+     .REQUIRED_ATTR(max_size, ListFloat)
+     .ATTR(img_h, Int, 0)
+     .ATTR(img_w, Int, 0)
+     .ATTR(step_h, Float, 0.0)
+     .ATTR(step_w, Float, 0.0)
+     .ATTR(flip, Bool, true)
+     .ATTR(clip, Bool, false)
+     .ATTR(offset, Float, 0.5)
+     .ATTR(variance, ListFloat, {0.1})
+     .OP_END_FACTORY_REG(PriorBoxD);
+
+/**
+*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
+
+*@par Inputs:
+* Six inputs, including:
+*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
+*@li img: source image. Has the same type and format as "x".
+*@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
+
+*@par Attributes:
+*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
+*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
+*@li img_h: An optional int32, specifying the height of the source image.
+*@li img_w: An optional int32, specifying the width of the source image.
+*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
+*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
+*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
+*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
+*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
+*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
+
+*@par Outputs:
+*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
+
+*@attention Constraints:
+* This operator applies only to SSD networks.
+*@see SSDDetectionOutput()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
+*/
+ REG_OP(PriorBoxDV2)
+     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .REQUIRED_ATTR(min_size, ListFloat)
+     .REQUIRED_ATTR(max_size, ListFloat)
+     .ATTR(img_h, Int, 0)
+     .ATTR(img_w, Int, 0)
+     .ATTR(step_h, Float, 0.0)
+     .ATTR(step_w, Float, 0.0)
+     .ATTR(flip, Bool, true)
+     .ATTR(clip, Bool, false)
+     .ATTR(offset, Float, 0.5)
+     .ATTR(variance, ListFloat, {0.1})
+     .OP_END_FACTORY_REG(PriorBoxDV2);
+
+/**
+*@brief Performs Position Sensitive ROI Pooling . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
+* map, dimension C1 must be equal to
+* (int(output_dim+15)/C0))*group_size*group_size.
+*@li rois: A tensor of type float16 or float32, with shape
+* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0" . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels,
+* must be greater than 0.
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps, must be within the range (0, 128).
+*@li spatial_scale: A required float32, scaling factor for mapping the input
+* coordinates to the ROI coordinates . \n
+
+*@par Outputs:
+*y: An NC1HWC0 tensor of type float16 or float32, describing the result
+* feature map . \n
+
+*@attention Constraints:
+* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
+*/
+REG_OP(PSROIPooling)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(output_dim, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(PSROIPooling)
+
+/**
+*@brief Returns detection result . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
+*@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
+*@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
+*@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
+*@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
+*@par Attributes:
+*@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
+*@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
+*@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
+*@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
+*@par Outputs:
+*@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
+* 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
+That is, take min (the maximum number of input boxes, 1024)
+*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n
+
+*@attention Constraints:
+*@li totalnum < max_rois_num * batch_rois.
+*@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
+*@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(FSRDetectionOutput)
+    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
+    .OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
+    .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(batch_rois, Int, 1)
+    .REQUIRED_ATTR(num_classes, Int)
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(iou_threshold, Float)
+    .OP_END_FACTORY_REG(FSRDetectionOutput)
+
+/**
+*@brief Returns detection result . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
+*@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
+*@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
+*@par Attributes:
+*@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
+*@li share_location: An optional bool, specify the shared location. Defaults to True
+*@li background_label_id: An optional int32, specify the background label id. Must be 0
+*@li iou_threshold: An optional float32, specify the nms threshold
+*@li top_k: An optional int32, specify the topk value. Defaults to 200
+*@li eta: An optional float32, specify the eta value. Defaults to 1.0
+*@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
+*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
+*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
+*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
+*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
+*@par Outputs:
+*@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
+*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
+* In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(SSDDetectionOutput)
+    .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(out_boxnum, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(num_classes, Int, 2)
+    .ATTR(share_location, Bool, true)
+    .ATTR(background_label_id, Int, 0)
+    .ATTR(iou_threshold, Float, 0.3)
+    .ATTR(top_k, Int, 200)
+    .ATTR(eta, Float, 1.0)
+    .ATTR(variance_encoded_in_target, Bool, false)
+    .ATTR(code_type, Int, 1)
+    .ATTR(keep_top_k, Int, -1)
+    .ATTR(confidence_threshold, Float, 0.0)
+    .OP_END_FACTORY_REG(SSDDetectionOutput)
+
+/**
+*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
+
+*@par Inputs:
+*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
+where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
+as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
+
+*@par Attributes:
+*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
+*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
+*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
+*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
+*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
+*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
+*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
+
+*@par Outputs:
+*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
+*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
+*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
+* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
+
+*@attention Constraints:
+*@li This operator applies to YOLO v2 and v3 networks.
+*@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(Yolo)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(yolo_version, String, "V3")
+    .ATTR(softmax, Bool, false)
+    .ATTR(background, Bool, false)
+    .ATTR(softmaxtree, Bool, false)
+    .OP_END_FACTORY_REG(Yolo)
+
+/**
+*@brief Performs YOLO V2 detection . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
+* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li img_info: A float16 or float32, describing the image information including the required image height and width
+* and the actual image height and width.
+*
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
+* which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
+
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering,
+ which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+*
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
+* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+*
+*@attention Constraints:
+*@li This operator applies only to the YOLO v2 network.
+*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
+*
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(YoloV2DetectionOutput)
+    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 5)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 20)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV2DetectionOutput)
+
+/**
+*@brief Performs YOLO V2 detection . \n
+
+*@par Inputs:
+*Six inputs, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
+* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li imginfo: A float16, describing the image information including the required image height and width
+* and the actual image height and width.
+*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
+* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.
+
+*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].
+
+*
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
+
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+*
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
+* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+*
+*@attention Constraints:
+*@li This operator applies only to the YOLO v2 network.
+*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n
+
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
+*/
+REG_OP(YoloV2DetectionOutputD)
+    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 5)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 20)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
+
+/**
+*@brief Performs YOLO V3 detection . \n
+
+*@par Inputs:
+*Ten inputs, including:
+*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
+* There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
+*@li img_info: A float16 or float32, describing the image information including the required image height and width
+* and the actual image height and width.
+
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
+
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
+
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+
+*@attention Constraints:
+*@li This operator applies only to the YOLO v3 network.
+*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n
+
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(YoloV3DetectionOutput)
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases_low, ListFloat)
+    .REQUIRED_ATTR(biases_mid, ListFloat)
+    .REQUIRED_ATTR(biases_high, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV3DetectionOutput)
+
+/**
+*@brief Performs YOLO V3 detection . \n
+
+*@par Inputs:
+*16 Input, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
+* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li imginfo: A float16, describing the image information including the required image height and width
+* and the actual image height and width.
+*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
+* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
+
+*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
+* [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
+s
+*@par Attributes:
+*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+
+*@attention Constraints:
+*@li This operator applies only to the YOLO v3 network.
+*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
+*/
+REG_OP(YoloV3DetectionOutputD)
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases_low, ListFloat)
+    .REQUIRED_ATTR(biases_mid, ListFloat)
+    .REQUIRED_ATTR(biases_high, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
+
+/**
+*@brief Performs YOLO V3 detection . \n
+
+*@par Inputs:
+*Ten inputs, including:
+*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
+There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
+*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
+* and the actual image height and width.
+
+*@par Attributes:
+*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
+
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+
+*@attention Constraints:\n
+*@li This operator applies only to the YOLO v3 network.
+*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
+
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(YoloV3DetectionOutputV2)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
+
+/**
+*@brief Performs YOLO V3 detection.
+
+*@par Inputs:
+*16 Input, including:
+*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
+* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
+*@li imginfo: A float16, describing the image information including the required image height and width
+* and the actual image height and width.
+*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
+* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
+* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
+
+*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
+*@par Attributes:
+*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
+*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
+*@li coords: Specifies the number of coordinate parameters. Must be 4.
+*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
+*@li relative: An optional bool. Defaults to and must be "true".
+*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
+*@li post_nms_topn: An optional int32. This attribute is reserved.
+*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
+*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
+*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
+*
+*@par Outputs:
+*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
+*            describing the information of each output box.
+* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
+*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
+* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
+*
+*@attention Constraints:
+*@li This operator applies only to the YOLO v3 network.
+*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
+*@see Yolo()
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
+*/
+REG_OP(YoloV3DetectionOutputV2D)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .REQUIRED_ATTR(biases, ListFloat)
+    .ATTR(boxes, Int, 3)
+    .ATTR(coords, Int, 4)
+    .ATTR(classes, Int, 80)
+    .ATTR(relative, Bool, true)
+    .ATTR(obj_threshold, Float, 0.5)
+    .ATTR(post_nms_topn, Int, 512)
+    .ATTR(score_threshold, Float, 0.5)
+    .ATTR(iou_threshold, Float, 0.45)
+    .ATTR(pre_nms_topn, Int, 512)
+    .ATTR(N, Int, 10)
+    .ATTR(resize_origin_img_to_net, Bool, false)
+    .ATTR(out_box_dim, Int, 3)
+    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out_num, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
+
+/**
+*@brief Spatial Pyramid Pooling, multi-level pooling.
+* Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n
+
+*@par Inputs:
+*x: An NCHW tensor, support float16 or float32 type . \n
+
+*@par Attributes:
+* @li pyramid_height: An required int32.
+* Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
+* @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
+* Defaults to "0" . \n
+
+*@par Outputs:
+*y: A NCHW tensor, support float16 or float32 type . \n
+
+*@attention Constraints:
+* @li pyramid_height: pyramid_heigjt should be in range [0,7).
+* Pooling paramter should statisfied with caffe pooling param(pad<kernel).
+* @li feature_size:input feture map h and w should be [1, 510] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator SPP.
+*/
+REG_OP(SPP)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(pyramid_height, Int)
+    .ATTR(pool_method, Int, 0)
+    .OP_END_FACTORY_REG(SPP)
+
+/**
+*@brief Performs Region of Interest (ROI) Pooling . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
+* map.
+*@li rois: A tensor of type float16 or float32, with 3D shape
+* [batch, 5, roi_max_num], describing the RIOs.
+* roi_max_num must be less than or equal to 6000 and must be divided by 16.
+*@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
+* the number of ROIs per batch . \n
+
+*@par Attributes:
+*@li pooled_h: A required int32, specifying the pooled H. Must be greater
+* than 0.
+*@li pooled_w: A required int32, specifying the pooled W. Must be greater
+* than 0.
+*@li spatial_scale_h: An required scaling factor for mapping the input
+* coordinates of height to the ROI coordinates.
+*@li spatial_scale_w: An required scaling factor for mapping the input
+* coordinates of width to the ROI coordinates . \n
+
+*@par Outputs:
+*y: An NC1HWC0 tensor of type float16 or float32, describing the result
+* feature map . \n
+
+*@attention Constraints:
+*@li For the feature map input:
+(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
+(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
+(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
+(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
+(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
+(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
+(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
+(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
+(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
+(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
+(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
+(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
+(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
+(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
+(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
+(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
+(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
+(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
+(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(ROIPooling)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(pooled_h, Int)
+    .REQUIRED_ATTR(pooled_w, Int)
+    .REQUIRED_ATTR(spatial_scale_h, Float)
+    .REQUIRED_ATTR(spatial_scale_w, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(ROIPooling)
+
+/**
+*@brief Computes decode bbox function.
+
+*@par Inputs:
+*Inputs include:
+* @li box_predictions: A Tensor. Must be float16.
+* @li anchors: A Tensor. Must have the same type as box_predictions.
+
+*@par Attributes:
+* @ decode_clip: required, float, threahold of decode process.
+
+*@par Outputs:
+* @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
+*                    N-D with shape [N, 4].
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DecodeBbox)
+    .INPUT(box_predictions, TensorType{DT_FLOAT16})
+    .INPUT(anchors, TensorType{DT_FLOAT16})
+    .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
+    .REQUIRED_ATTR(decode_clip, Float)
+    .OP_END_FACTORY_REG(DecodeBbox)
+
+/**
+*@brief Computes ClipBoxes function . \n
+
+*@par Inputs:
+*@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
+*@li img_size: A Tensor. Must be int32. shape [H, W] . \n
+
+*@par Outputs:
+*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ClipBoxes)
+    .INPUT(boxes_input, TensorType({DT_FLOAT16}))
+    .INPUT(img_size, TensorType({DT_INT32}))
+    .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(ClipBoxes)
+
+/**
+*@brief Computes ClipBoxesD function . \n
+
+*@par Attributes:
+*img_size: A Tensor of shape [H, W] . \n
+
+*@par Inputs:
+*boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n
+
+*@par Outputs:
+*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ClipBoxesD)
+    .INPUT(boxes_input, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(img_size, ListInt)
+    .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(ClipBoxesD)
+
+/**
+*@brief Computes Fastrcnn Predictions function.
+*
+*@par Inputs:
+*Inputs include:
+* @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
+* @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
+*
+*@par Attributes:
+* @li nms_threshold: required, float, threahold of nms process.
+* @li score_threshold: required, float, threahold of topk process.
+* @li k: required, Int, threahold of topk process.
+*@par Outputs:
+* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
+* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
+* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
+*/
+REG_OP(FastrcnnPredictions)
+    .INPUT(rois, TensorType({DT_FLOAT16}))
+    .INPUT(score, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(nms_threshold, Float)
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(k, Int)
+    .OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
+    .OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
+    .OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(FastrcnnPredictions)
+
+/**
+*@brief Computes Fastrcnn RpnProposals function . \n
+
+*@par Inputs:
+*Inputs include:
+* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
+* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
+* @li img_size: A Tensor. Must be int32. shape [H, W] . \n
+
+*@par Attributes:
+* @li score_threshold: required, float, threahold of topk process.
+* @li k: required, Int, threahold of topk process.
+* @li min_size: required, float, threahold of nms process.
+* @li nms_threshold: required, float, threahold of nms process.
+* @li post_nms_num: required, float, threahold of nms process.
+* @li score_filter: bool, mark of score_filter. Defaults to "true"
+* @li box_filter: bool, mark of box_filter. Defaults to "true"
+* @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
+
+*@par Outputs:
+* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
+* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
+* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Unpack.
+*/
+REG_OP(RpnProposals)
+    .INPUT(rois, TensorType({DT_FLOAT16}))
+    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
+    .INPUT(img_size, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(k, Int)
+    .REQUIRED_ATTR(min_size, Float)
+    .REQUIRED_ATTR(nms_threshold, Float)
+    .REQUIRED_ATTR(post_nms_num, Int)
+    .ATTR(score_filter, Bool, true)
+    .ATTR(box_filter, Bool, true)
+    .ATTR(score_sigmoid, Bool, false)
+    .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(RpnProposals)
+
+/**
+*@brief Computes Fastrcnn RpnProposalsD function . \n
+
+*@par Inputs:
+*@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
+*@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n
+
+*@par Attributes:
+*@li img_size: A Tensor size of image. Must be int32. shape [H, W].
+*@li score_threshold: required, float, threahold of topk process.
+*@li k: required, Int, threahold of topk process.
+*@li min_size: required, float, threahold of nms process.
+*@li nms_threshold: required, float, threahold of nms process.
+*@li post_nms_num: required, float, threahold of nms process.
+*@li score_filter: bool, mark of score_filter. Defaults to "true"
+*@li box_filter: bool, mark of box_filter. Defaults to "true"
+*@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
+
+*@par Outputs:
+*sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n
+
+* @par Third-party framework compatibility
+* Compatible with the pytorch operator RPNProposals . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
+*/
+REG_OP(RpnProposalsD)
+    .INPUT(rois, TensorType({DT_FLOAT16}))
+    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(img_size, ListInt)
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(k, Int)
+    .REQUIRED_ATTR(min_size, Float)
+    .REQUIRED_ATTR(nms_threshold, Float)
+    .REQUIRED_ATTR(post_nms_num, Int)
+    .ATTR(score_filter, Bool, true)
+    .ATTR(box_filter, Bool, true)
+    .ATTR(score_sigmoid, Bool, false)
+    .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(RpnProposalsD)
+
+/**
+*@brief Computes Score Filte Pre-Sort function.
+
+*@par Inputs:
+*Inputs include:
+* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
+* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
+
+*@par Attributes:
+* @li score_threshold: required, float, threahold of topk process.
+* @li k: required, Int, threahold of topk process.
+* @li score_filter: bool, mark of score_filter. Defaults to "true"
+* @li core_max_num: int, max number of core. Defaults to "8"
+*@par Outputs:
+* @li sorted_proposal: A Tensor. Must be float16.
+*                      N-D with shape [8*6002, 8].
+* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
+*/
+
+REG_OP(ScoreFiltePreSort)
+    .INPUT(rois, TensorType({DT_FLOAT16}))
+    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
+    .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
+    .OUTPUT(proposal_num, TensorType({ DT_UINT32}))
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(k, Int)
+    .ATTR(score_filter, Bool, true)
+    .ATTR(core_max_num, Int, 8)
+    .OP_END_FACTORY_REG(ScoreFiltePreSort)
+
+/**
+*@brief Computes Score Filte Pre-Sort function.
+*
+*@par Inputs:
+*Inputs include:
+* @li sorted_proposal: A Tensor. Must be float16.
+*                      N-D with shape [8*6002, 8].
+* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
+*
+*@par Attributes:
+* @li min_size: required, float, threahold of nms process.
+* @li score_threshold: required, float, threahold of topk process.
+* @li k: required, Int, threahold of topk process.
+* @li min_size: required, float, threahold of nms process.
+* @li nms_threshold: required, float, threahold of nms process.
+* @li post_nms_num: required, float, threahold of nms process.
+* @li box_filter: bool, mark of box_filter. Defaults to "true"
+* @li core_max_num: int, max number of core. Defaults to "8"
+*@par Outputs:
+* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
+* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
+* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
+*/
+REG_OP(RpnProposalPostProcessing)
+    .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
+    .INPUT(proposal_num, TensorType({DT_UINT32}))
+    .OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
+    .REQUIRED_ATTR(img_size, ListInt)
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(k, Int)
+    .REQUIRED_ATTR(min_size, Float)
+    .REQUIRED_ATTR(nms_threshold, Float)
+    .REQUIRED_ATTR(post_nms_num, Int)
+    .ATTR(box_filter, Bool, true)
+    .ATTR(core_max_num, Int, 8)
+    .OP_END_FACTORY_REG(RpnProposalPostProcessing)
+/**
+*@brief Computes DecodeBoundariesTarget function.
+
+*@par Inputs:
+*Inputs include:
+* @li boundary_predictions: A Tensor. Must be float16.
+* @li anchors: A Tensor. Must be float16.
+
+*@par Outputs:
+* @ boundary_encoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DecodeBoundariesTarget)
+    .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
+    .INPUT(anchors, TensorType({DT_FLOAT16}))
+    .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DecodeBoundariesTarget)
+
+/**
+*@brief Computes DecodeCornerpointsTargetBG function.
+*
+*@par Inputs:
+*Inputs include:
+* @li keypoints_prediction: A Tensor. Must be float16.
+* @li anchors: A Tensor. Must be float16.
+*
+*@par Outputs:
+* @ keypoints_decoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DecodeCornerpointsTargetBG)
+    .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
+    .INPUT(anchors, TensorType({DT_FLOAT16}))
+    .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);
+
+/**
+*@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
+*
+*@par Inputs:
+*Inputs include:
+* @li keypoints_prediction: A Tensor. Must be float16.
+* @li anchors: A Tensor. Must be float16.
+*
+*@par Outputs:
+* @ keypoints_decoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DecodeCornerpointsTargetWrtCenterV1)
+    .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
+    .INPUT(anchors, TensorType({DT_FLOAT16}))
+    .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)
+
+/**
+*@brief Computes DecodeWheelsTarget function.
+*
+*@par Inputs:
+*Inputs include:
+* @li boundary_predictions: A Tensor. Must be float16.
+* @li anchors: A Tensor. Must be float16.
+*
+*@par Outputs:
+* @ boundary_encoded: A Tensor. Must be float16.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DecodeWheelsTarget)
+    .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
+    .INPUT(anchors, TensorType({DT_FLOAT16}))
+    .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DecodeWheelsTarget)
+
+/**
+*@brief Computes nms for input boxes and score, support multiple batch and classes.
+* will do clip to window, score filter, top_k, and nms
+
+*@par Inputs:
+* Four inputs, including:
+*@li boxes: boxes, a 4D Tensor of type float16 with
+* shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
+* and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
+* and the value "4" refers to "x0", "x1", "y0", and "y1".
+*@li scores: boxes, a 4D Tensor of type float16 with
+* shape (batch, num_anchors, num_classes).
+*@li clip_window: window size, a 2D Tensor of type float16 with
+* shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
+*@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
+* shape (batch,) . \n
+
+*@par Attributes:
+*@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
+*@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
+*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
+*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
+*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
+*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
+
+*@par Outputs:
+*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
+* specifying the output nms boxes per batch.
+*@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
+* specifying the output nms score per batch.
+*@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
+* specifying the output nms class per batch.
+*@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported.
+* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
+*/
+REG_OP(BatchMultiClassNonMaxSuppression)
+    .INPUT(boxes, TensorType({DT_FLOAT16}))
+    .INPUT(scores, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
+    .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
+    .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
+    .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
+    .OUTPUT(nmsed_num, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(score_threshold, Float)
+    .REQUIRED_ATTR(iou_threshold, Float)
+    .REQUIRED_ATTR(max_size_per_class, Int)
+    .REQUIRED_ATTR(max_total_size, Int)
+    .ATTR(change_coordinate_frame, Bool, false)
+    .ATTR(transpose_box, Bool, false)
+    .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
+
+/**
+* @brief To absolute the bounding box . \n
+
+* @par Inputs:
+* @li normalized_boxes: A 3D Tensor of type float16 or float32.
+* @li shape_hw: A 1D Tensor of type int32 . \n
+
+* @par Attributes:
+* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
+* "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and shape as "normalized_boxes" . \n
+
+* @attention Constraints:
+* "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
+* "shape_hw"'s shape must be (4,)
+*/
+REG_OP(ToAbsoluteBBox)
+    .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(shape_hw, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reversed_box, Bool, false)
+    .OP_END_FACTORY_REG(ToAbsoluteBBox)
+
+/**
+*@brief Computes Normalize bbox function.
+*
+*@par Inputs:
+*Inputs include:
+* @li boxes: A Tensor. Must be float16 or float32.
+* @li shape_hw: A Tensor. Must be int32.
+*
+*@par Attributes:
+* reversed_box: optional, bool. Defaults to "False"
+*
+*@par Outputs:
+* y: A Tensor. Must have the same type and shape as boxes.
+*/
+REG_OP(NormalizeBBox)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(shape_hw, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reversed_box, Bool, false)
+    .OP_END_FACTORY_REG(NormalizeBBox)
+
+/**
+*@brief Computes decode bboxv2 function.
+*
+*@par Inputs:
+*Inputs include:
+* @li boxes: A Tensor. Must be float16 or float32.
+* @li anchors: A Tensor. Must be int32.
+*
+*@par Attributes:
+* @li scales: optional, listfloat, .
+* @li decode_clip: optional, float, threahold of decode process.
+* @li reversed_boxes: optional, bool,.
+*
+*@par Outputs:
+* y: A Tensor. Must have the same type as box_predictions.
+*/
+REG_OP(DecodeBboxV2)
+    .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
+    .ATTR(decode_clip, Float, 0.0)
+    .ATTR(reversed_box, Bool, false)
+    .OP_END_FACTORY_REG(DecodeBboxV2)
+
+/**
+*@brief Computes sort function.
+*
+*@par Inputs:
+*Inputs include:
+* x: A Tensor. Must be float16 or float32.
+*
+*@par Attributes:
+* @li axis: optional, int.
+* @li descending: optional,bool.
+*
+*@par Outputs:
+* @li y1: A Tensor. Must have the same type as x.
+* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
+*
+*@attention Constraints:
+* The upper limit of data on the direction axis is 7040.
+*/
+REG_OP(Sort)
+    .INPUT(x, TensorType({ DT_FLOAT16 }))
+    .OUTPUT(y1, TensorType({ DT_FLOAT16 }))
+    .OUTPUT(y2, TensorType({ DT_INT32 }))
+    .ATTR(axis, Int, -1)
+    .ATTR(descending, Bool, false)
+    .OP_END_FACTORY_REG(Sort)
+
+REG_OP(PtIou)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(PtIou)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Attributes:
+*center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+
+*@par Outputs:
+*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+*/
+
+REG_OP(NonMaxSuppressionV6)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV6)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
+the last dim representing (batch_id,class_id,index_id)  . \n
+
+*@par Attributes:
+*center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+
+*@par Outputs:
+*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+*/
+
+
+REG_OP(NonMaxSuppressionV7)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV7)
+
+/**
+*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li features: A 5HD Tensor list of type float32 or float16.
+*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
+
+*@par Attributes:
+*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
+*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
+*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
+* to the original image.
+*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
+*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
+*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
+* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
+* which is a floating point number. Defaults to "0".
+*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
+*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
+
+*@par Outputs:
+* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
+* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
+* "pooled_width", and "features", respectively.
+
+*@par Third-party framework compatibility
+*Compatible with mmdetection SingleRoIExtractor operator.
+*/
+REG_OP(RoiExtractor)
+    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(finest_scale, Int, 56)
+    .ATTR(roi_scale_factor, Float, 0)
+    .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
+    .ATTR(pooled_height, Int, 7)
+    .ATTR(pooled_width, Int, 7)
+    .ATTR(sample_num, Int, 0)
+    .ATTR(pool_mode, String, "avg")
+    .ATTR(aligned, Bool, true)
+    .OP_END_FACTORY_REG(RoiExtractor)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h
new file mode 100644
index 00000000..00e2020f
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h
@@ -0,0 +1,1279 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_norm_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
+
+#include "graph/operator_reg.h"
+namespace ge {
+
+/**
+*@brief Computes the gradient for log softmax activations . \n
+
+*@par Inputs:
+*@li grad: A Tensor. Must be one of the following types: float16, float32.
+*@li x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+* axis: An optional list of ints. Defaults to "{-1}" . \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "grad" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LogSoftmaxGrad.
+*/
+
+REG_OP(LogSoftmaxGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axis, ListInt, {-1})
+    .OP_END_FACTORY_REG(LogSoftmaxGrad)
+
+/**
+*@brief Computes sparse softmax cross entropy cost and gradients to backpropagate . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li features: A Tensor. Must be one of the following types: half, float32, double.
+*    A "batch_size * num_classes" matrix.
+* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
+
+
+*@par Outputs:
+*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits.
+*/
+REG_OP(SparseSoftmaxCrossEntropyWithLogits)
+    .INPUT(features, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(labels, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(SparseSoftmaxCrossEntropyWithLogits)
+
+/**
+*@brief Computes softmax cross entropy cost and gradients to backpropagate . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li features: A Tensor. Must be one of the following types: half, float32, double.
+*    A "batch_size * num_classes" matrix.
+* @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n
+
+*@par Outputs:
+*loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
+*backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits.
+*/
+REG_OP(SoftmaxCrossEntropyWithLogits)
+    .INPUT(features, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT}))
+    .INPUT(labels, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(backprop, TensorType({DT_DOUBLE,DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(SoftmaxCrossEntropyWithLogits)
+
+/**
+*@brief Computes gradients for a softmax operation . \n
+
+*@par Inputs:
+* Two inputs, including:
+* @li softmax: Output of the softmax operator. Must be one of the following
+* types: float16, float31, int32, int8, uint8. The format is NC1HWC0 or DN.
+* @li grad_softmax: A Tensor. Has the same shape and type as "softmax".
+* The format is NC1HWC0 or DN . \n
+
+*@par Outputs:
+*grad_x: A Tensor. Has the same shape and type as "softmax" . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator SoftmaxGrad.
+*/
+REG_OP(SoftmaxGrad)
+    .INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OP_END_FACTORY_REG(SoftmaxGrad)
+
+/**
+*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
+
+*@par Outputs:
+*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad.
+*/
+REG_OP(SigmoidCrossEntropyWithLogitsGrad)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGrad)
+
+/**
+*@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
+*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n
+
+*@par Outputs:
+*gradient: Return gradient. Has the same dimensions and type as "predict" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits.
+*/
+REG_OP(SigmoidCrossEntropyWithLogits)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits)
+
+/**
+*@brief Computes the sigmoid cross entropy loss of "predict" and "target".
+
+*@par Inputs:
+* four inputs, including:
+*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
+*@li weight: An multi-dimensional Tensor, specifying the weight value.
+*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n
+
+*@par Attributes:
+*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n
+
+*@par Outputs:
+*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n
+
+*@par Third-party framework compatibility
+* Compatible with PyTorch operator BCEWithLogitsLoss.
+*/
+REG_OP(SigmoidCrossEntropyWithLogitsV2)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2)
+
+/**
+*@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
+*@li label: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
+
+*@par Attributes:
+* sigma: Must be a floating point number. Defaults to "1.0" . \n
+
+*@par Outputs:
+*loss: Indicates the loss between the predictive value and target value. Has the same dimensions as "predict" . \n
+
+*@attention Constraints:
+* This operator does not perform the "reduce" operation on the loss value. Call other reduce operators to perform "reduce" operation on the loss if required . \n
+
+*@par Third-party framework compatibility
+* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SmoothL1Loss.
+*/
+REG_OP(SmoothL1Loss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(sigma, Float, 1.0)
+    .OP_END_FACTORY_REG(SmoothL1Loss)
+
+/**
+*@brief Performs the backpropagation of SmoothL1Loss for training scenarios . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
+*@li label: A multi-dimensional Tensor of float16 or float32, specifying the target value.
+*@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n
+
+*@par Attributes:
+* sigma: Must be a floating point number. Defaults to "1.0" . \n
+
+*@par Outputs:
+*gradient: Return gradient. Has the same dimensions and type as "predict" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SmoothL1LossGrad.
+*/
+REG_OP(SmoothL1LossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(sigma, Float, 1.0)
+    .OP_END_FACTORY_REG(SmoothL1LossGrad)
+
+/**
+*@brief Creates a criterion that measures the Binary Cross Entropy between the target and the output . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A 1D or 2D Tensor of type float16 or float32, specifying a predictive value.
+*@li y: A 1D or 2D Tensor of type float16 or float32, indicating a tag.
+*@li weight: An optional 1D or 2D Tensor, specifying the weight . \n
+
+*@par Attributes:
+*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n
+
+*@par Outputs:
+*output: Output loss. Has the same dimension with the inputs. When "reduction" is set to "none", a Tensor with the same size as "x" is output. Otherwise, a Scalar is output . \n
+
+*@attention Constraints:
+*@li The value of "x" must range from 0 to 1.
+*@li The value of "y" must be "0" or "1" . \n
+
+*@par Third-party framework compatibility
+* Compatible with PyTorch operator BCELoss.
+*/
+REG_OP(BinaryCrossEntropy)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(BinaryCrossEntropy)
+
+/**
+*@brief Performs the backpropagation of BinaryCrossEntropy for training scenarios . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li x: A 1D or 2D Tensor of type float16 or float32, specifying a predictive value.
+*@li y: A 1D or 2D Tensor of type float16 or float32, indicating a tag.
+*@li grad_output: A 1D or 2D Tensor of type float16 or float32, specifying the backpropagation gradient.
+*@li weight: An optional 1D or 2D Tensor, specifying the weight . \n
+
+*@par Attributes:
+*reduction: A character string from "none", "mean", and "sum", specifying the gradient output mode. Defaults to "mean" . \n
+
+*@par Outputs:
+*output: A 1D or 2D Tensor. When "reduction" is set to "none", a Tensor with the same size as "x" is output. Otherwise, a Scalar is output . \n
+
+*@attention Constraints:
+*@li The value of "x" must range from 0 to 1.
+*@li The value of "y" must be "0" or "1" . \n
+
+*@par Third-party framework compatibility
+* Compatible with PyTorch operator BCELossGrad.
+*/
+REG_OP(BinaryCrossEntropyGrad)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(grad_output, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(BinaryCrossEntropyGrad)
+
+/**
+*@brief Applies the Softmax function to an n-dimensional input Tensor
+* rescaling them. so that the elements of the n-dimensional output Tensor lie
+* in the range [0,1] and sum to 1 . \n
+
+*@par Inputs:
+*One input:
+*x: A mutable Tensor. Must be one of the following types: float16, float32,
+* double. Should be a Variable Tensor . \n
+
+*@par Attributes:
+*axes: A list of int. The dimension softmax would be performed on. Defaults
+* to "[-1]" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same dimensionality and shape as the "x" with values in
+* the range [0, 1]. Must be one of the following types: float16, float32,
+* double . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Softmax.
+*/
+REG_OP(SoftmaxV2)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(SoftmaxV2)
+
+/**
+*@brief Computes log softmax activations . \n
+
+*@par Inputs:
+*One input:
+* logits: A Tensor. Must be one of the following types: double, float16, float32 . \n
+
+*@par Attributes:
+* axes: An optional list of ints. Defaults to "{-1}" . \n
+
+*@par Outputs:
+* logsoftmax: A Tensor. Has the same type as "logits" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LogSoftmax.
+*/
+REG_OP(LogSoftmaxV2)
+    .INPUT(logits, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(logsoftmax, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(LogSoftmaxV2)
+
+/**
+*@brief Confuse mul, sum and sub . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+* y: A Tensor of the same type as "grad" . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ConfusionSoftmaxGrad)
+  .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .OP_END_FACTORY_REG(ConfusionSoftmaxGrad)
+
+/**
+*@brief Function softmax gradients ext . \n
+
+*@par Inputs:
+* @li grad: A Tensor dtype of float16, float32.
+* @li x1: A Tensor dtype of float16, float32.
+* @li x2: A Tensor dtype of float16, float32 . \n
+
+*@par Attributes:
+*@li axis: A int Scalar. The axis for reduce.
+*@li keepdims: A bool Scalar. If true, retains reduced dimensions with length 1 . \n
+
+*@par Outputs:
+*y: A Tensor dtype of float16, float32.
+*/
+REG_OP(SoftmaxGradExt)
+  .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+  .ATTR(axes, Int, 1)
+  .ATTR(keep_dims, Bool, false)
+  .OP_END_FACTORY_REG(SoftmaxGradExt)
+
+/**
+*@brief Normalizes the input . \n
+
+*@par Inputs:
+* One input:
+*x: An NCHW tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li normalize_variance: An optional bool specifying whether to normalize the variance, either "true" (default) or "false"
+* the value "false" indicates only to subtract the mean.
+*@li across_channels: An optional bool specifying whether to perform across-channel MVN, either "true" or "false" (default)
+* The value "true" indicates "CHW" is treated as a vector.
+*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
+
+*@par Outputs:
+*y: An NCHW tensor of type float16 or float32 . \n
+
+*@attention Constraints:
+* The input tensor must have the NCHW format, whose shape length must be 4.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator MVN.
+*/
+
+REG_OP(MVN)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as inputs" */
+    .ATTR(normalize_variance, Bool, true)
+    .ATTR(across_channels, Bool, false)
+    .ATTR(eps, Float, 1e-9)
+    .OP_END_FACTORY_REG(MVN)
+
+/**
+*@brief Normalizes the input "x1" . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x1: A required NCHW or NHWC tensor of type float32, float16, or int8.
+*@li x2: A required ND tensor of type float32, float16, or int8, specifying
+* the scaling factor. If "channel_shared" is "true", "x2" is a [1]-dimensional
+* vector. If "channel_shared" is "false", "x2" is a [C]-dimensional vector . \n
+
+*@par Attributes:
+*@li across_spatial: An optional bool, specifying the dimension of input "x1"
+* to be summed. The value "true" (default) indicates dimensions C, H, W, and
+* the value "false" indicates dimension C.
+*@li channel_shared: An optional bool, specifying the dimension count of input
+* "x2". The value "true" (default) indicates 1, and the value "false" indicates
+* dimension C of "x1".
+*@li eps: An optional float32, specifying the bias when "across_spatial" is
+* "true". Defaults to "1e-10" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x1" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Normalize.
+*/
+REG_OP(Normalize)
+     .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+     .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+     .ATTR(across_spatial, Bool, true)
+     .ATTR(channel_shared, Bool, true)
+     .ATTR(eps, Float, 1e-10)
+     .OP_END_FACTORY_REG(Normalize);
+
+/**
+*@brief Layernorm operator interface implementation
+*  calculating: x, gamma, beta
+*  mean  = np.mean(x, reduce_axis, keepdims=True)
+*  variance = np.mean(np.power((x - mean),2), reduce_axis, keepdims=True)
+*  y = gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li beta: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+* @li begin_norm_axis: A optional attribute, the type is int32. Defaults to 0.
+* @li begin_params_axis: A optional attribute, the type is int32. Defaults to 0.
+* @li epsilon: A optional attribute, the type is float32. Defaults to 1e-7 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li y: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(LayerNorm)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(begin_norm_axis, Int, 0)
+    .ATTR(begin_params_axis, Int, 0)
+    .ATTR(epsilon, Float, 0.0000001)
+    .OP_END_FACTORY_REG(LayerNorm)
+
+/**
+*@brief LayerNormGrad operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(LayerNormGrad)
+
+/**
+*@brief LayerNormXBackprop operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormXBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(LayerNormXBackprop)
+
+/**
+*@brief LayerNormBetaGammaBackprop operator interface implementation
+*  calculating: dy, x, variance, mean
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormBetaGammaBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(shape_gamma, ListInt)
+    .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask:
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128 //8
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+
+*@par Output:
+*y: A mutable Tensor. Has the same type as "x".
+*/
+REG_OP(DropOutDoMask)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DropOutDoMask)
+	
+/**
+*@brief Scales the input . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An ND tensor of type float16 or float32.
+*@li scale: An ND tensor of type float16 or float32.
+*@li bias: An optional ND tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li axis: An optional int32 used to compute the shape of scale and bias input from the online bottoms. Defaults to "1".
+*@li num_axes: An optional int32 used to compute the shape of scale and bias input from a Caffe model trained offline. Defaults to "1".
+*@li scale_from_blob: An optional bool. If "true", scale and bias are input from a Caffe model trained offline. If "false", scale and bias are input from online bottoms. Defaults to "true" . \n
+
+*@par Outputs:
+*y: An ND tensor of type float16 or float32 . \n
+
+*@attention Constraints:
+* Assume that the shape length of "x" is "n" and that of "scale" is "m".
+*@li "axis" is within the range [-n, n-1]. num_axes >= -1.
+*@li If "scale_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).
+* If "axis < 0", the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
+*@li If "scale_from_blob = true" and "num_axes = 0", "scale" is a scalar with shape length 1 and dimension size 1.
+*@li If "scale_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+*@li If "scale_from_blob = false", "scale" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).
+* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
+*@li If "bias" is not None, the constraints for "bias" is the same as that for "scale".
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Scale.
+*/
+REG_OP(Scale)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
+    .INPUT(scale, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Second operand." */
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Third operand." */
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as x" */
+    .ATTR(axis, Int, 1)
+    .ATTR(num_axes, Int, 1)
+    .ATTR(scale_from_blob, Bool, true)
+    .OP_END_FACTORY_REG(Scale)
+
+/**
+*@brief Local Response Normalization . \n
+
+*@par Inputs:
+*One input, including:
+*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5".
+* under the caffe framework, if local_size is provided and is an odd number,
+* depth_radius = (local_size - 1) / 2. local_size is the number of channels to sum over (for ACROSS_CHANNELS)
+* or the side length of the square region to sum over (for WITHIN_CHANNEL).
+*@li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
+* Defaults to "1.0".
+*@li alpha: An optional float32. A scaling factor, usually positive.
+* Defaults to "1.0".
+*@li beta: An optional float32. An exponent. Defaults to "0.75" for the caffe framework, Defaults to "0.5" for others.
+*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0. Defaults to "ACROSS_CHANNELS" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same data type and shape as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator LRN.
+*/
+REG_OP(LRN)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(depth_radius, Int, 5)
+    .ATTR(bias, Float, 1.0)
+    .ATTR(alpha, Float, 1.0)
+    .ATTR(beta, Float, 0.5)
+    .ATTR(norm_region, String, "ACROSS_CHANNELS")
+    .OP_END_FACTORY_REG(LRN)
+
+/**
+* @brief Computes the gradient for Local Response Normalization . \n
+
+* @par Inputs:
+* @li grads: A 4D Tensor of type float16 or float32.
+* @li x: A 4D Tensor of type float16 or float32.
+* @li y: A 4D Tensor of type float16 or float32 . \n
+
+* @par Attributes:
+* @li depth_radius: An optional int, specifying the half-width of the
+* normalization window. Defaults to "5".
+* @li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
+* Defaults to "1".
+* @li alpha: An optional float32. A scaling factor, usually positive.
+* Defaults to "1".
+* @li beta: An optional float32. An exponent. Defaults to "0.5" . \n
+
+* @par Outputs:
+* z: A Tensor. Has the same type and shape as "grads" . \n
+
+* @attention Constraints:
+* "x" and "y" must have the same shape and type as "grads" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator LRNGrad.
+*/
+REG_OP(LRNGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(depth_radius, Int, 5)
+    .ATTR(bias, Float, 1.0)
+    .ATTR(alpha, Float, 1.0)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(LRNGrad)
+
+ /**
+ *@brief Calculates the RNNT Loss (log probability) for each batch entry.
+ Also calculates the gradient.
+
+ *@par Inputs:
+ *@li acts: 4-D, shape: `(batch x seqLength x labelLength x outputDim)`, the logits.
+ *@li labels: 2-D Tensor containing all the targets of the batch with zero padded.
+ *@li input_lengths: Tensor of size (batch) containing size of each output sequence.
+ *@li label_lengths: Tensor of (batch) containing label length of each example.
+
+ *@par Outputs:
+ *@li costs: 1-D Tensor, the cost of each example in the batch.
+ *@li grads: A Tensor. Has the same type as acts.
+
+ *@par Attributes:
+ *@li blank_label: An optional attribute. Defaults to 0.
+
+ *@par Third-party framework compatibility
+ * Compatible with TensorFlow RNNTLoss operator.
+ */
+REG_OP(RNNTLoss)
+    .INPUT(acts, TensorType({DT_FLOAT}))
+    .INPUT(labels, TensorType({DT_INT32}))
+    .INPUT(input_lengths, TensorType({DT_INT32}))
+    .INPUT(label_lengths, TensorType({DT_INT32}))
+    .ATTR(blank_label, Int, 0)
+    .OUTPUT(costs, TensorType({DT_FLOAT}))
+    .OUTPUT(grads, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(RNNTLoss)
+
+/**
+*@brief Performs group normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW supported)
+*@li x: A 4D Tensor of type float16 or float32, with format NHWC or
+NCHW for 4D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format
+NHWC or NCHW. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 1D if input "x" is with
+format NHWC or NCHW. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format
+NHWC or NCHW. Reserved. Mu
+st be "None" if the operation is used for training.
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with
+format NHWC or NCHW. Specifies the variance used for inference. Reserved . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to
+variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x".
+Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for
+training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW supported)
+*@li y: A 4D Tensor of type float16 or float32 for the normalized "x",
+with format NHWC or NCHW for 4D.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with
+format NHWC or NCHW. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is
+with format NHWC or NCHW. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if
+input "x" is with format NHWC or NCHW. Specifies the mean o
+f "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if
+input "x" is with format NHWC or NCHW. Specifies the varian
+ce of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1"
+and "reserve_space_2" are available, then "reserve_space_1" has the same
+value as "mean" and "reserve_spa
+ce_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails  due to the square root
+instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the PyTorch operator GroupNorm.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(GroupNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT,}))
+    .INPUT(offset, TensorType({DT_FLOAT,}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .ATTR(num_groups, Int, 2)
+    .OP_END_FACTORY_REG(GroupNorm)
+
+/**
+*@brief Performs instance normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NC1HWC0, supported)
+*@li x: A 5D Tensor of type float16 or float32, NC1HWC0.
+*@li gamma: A Tensor of type float32.
+A 5D Tensor for scaling factor, to scale the normalized x.
+*@li beta: A Tensor of type float32.
+A 5D Tensor for offset, to shift to the normalized x.
+*@li mean: A Tensor of type float32.
+A 5D Tensor Specifies the mean used for inference. Reserved.
+*@li variance: A Tensor of type float32.
+A 5D Tensor Specifies the variance used for inference. Reserved . \n
+
+*@par Attributes:
+*@li is_training: An optional bool, specifying if the operation is used for
+training or inference. Defaults to "True".
+*@li momentum: An optional float32,
+the value used for the running_mean and running_var computation. Default: "0.1".
+*@li epsilon: An optional float32, specifying the small value added to
+variance to avoid dividing by zero. Defaults to "0.00001" . \n
+
+*@par Outputs:
+* Three outputs, including: (NHWC, NCHW NC1HWC0 supported)
+*@li y: A 5D tensor of type float16 or float32 for the normalized "x",
+*@li batch_mean: A Tensor of type float32.
+Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32.
+Specifies the variance of "x" . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the PyTorch operator InstanceNorm.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InstanceNormV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+
+    .ATTR(is_training, Bool, true)
+    .ATTR(momentum, Float, 0.1)
+    .ATTR(epsilon, Float, 0.00001)
+    .OP_END_FACTORY_REG(InstanceNormV2)
+
+/**
+*@brief Performs instance normalization for inference.
+
+*@par Inputs:\n
+* Five inputs, including: (NC1HWC0 supported)
+*@li x: A Tensor of type float16 or float32.
+*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
+*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
+*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean.
+*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
+*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
+
+*@par Outputs:\n
+*y: A Tensor of type float16 or float32 for the normalized "x".
+*batch_mean: A Tensor of type float32 for the result mean.
+*batch_ variance: A Tensor of type float32 for the result variance.
+
+*@attention Constraints:
+*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead.
+*/
+REG_OP(INInferV2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance_sqrt, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INInferV2D)
+
+/**
+*@brief Performs instance normalization for inference of InHost part.
+
+*@par Inputs:\n
+* One input, including: (NC1HWC0 supported)
+* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
+
+*@par Attributes:
+* epsilon: An optional float32, specifying the small value added to
+variance to avoid dividing by zero. Defaults to "0.00001" . \n
+
+*@par Outputs:\n
+* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
+*/
+REG_OP(InHost)
+     .INPUT(variance, TensorType({DT_FLOAT}))
+     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
+     .ATTR(epsilon, Float, 0.00001)
+     .OP_END_FACTORY_REG(InHost)
+
+/**
+* @brief perform instance normalization to x. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND.
+* @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND.
+
+* @par Attributes:
+* @li data_format: An attribute of type String \n
+* @li epsilon: An attribute of type Float, . \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n
+* @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
+* @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
+
+* @par Third-party framework compatibility
+* Can be used by onnx InstanceNormalization
+*/
+REG_OP(InstanceNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(data_format, String)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OP_END_FACTORY_REG(InstanceNorm)
+
+REG_OP(KlDivLossGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .ATTR(log_target, Bool, false)
+    .OP_END_FACTORY_REG(KlDivLossGrad)
+
+/**
+* @brief Computes l1_loss_grad or l1_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grads: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li predict: A Tensor. Has the same type as "grads". Required.
+* @li label: A Tensor. Has the same type as "grads". Required. \n
+
+* @par Attributes:
+* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator L1LossGrad.
+*/
+REG_OP(L1LossGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(L1LossGrad)
+
+/**
+* @brief Computes loss of lp, p=1,2,3....
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss.
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpLoss.
+*/
+REG_OP(LpLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(p, Int)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(LpLoss)
+
+/**
+* @brief Computes gradients of mse loss.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator MseLossGrad.
+*/
+REG_OP(MseLossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLossGrad)
+
+/**
+* @brief Computes mse loss.
+* @par Inputs:
+* two inputs, including:
+*  @li predict: An ND Tensor of dtype float16 or float32.
+*  @li label: An ND Tensor of dtype float16 or float32.\n
+*
+* @par Attributes:
+*  @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
+*
+* @par Outputs:
+*  @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
+*    same type and shape as "predict".\n
+*/
+REG_OP(MseLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLoss)
+
+/**
+* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n
+
+* @par Inputs:
+* Three Inputs, including:
+* @li predict: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li label: A Tensor. Has the same type as "predict".
+* @li dout: A Tensor. Has the same type as "predict". \n
+
+* @par Attributes:
+* Two Attributes, including:
+* @li sigma: An optional float. Defaults to 1.0. \n
+
+* @li reduction: An optional string. Defaults to "mean",
+*    Must be one of the following: "none", "mean", "sum". \n
+
+* @par Outputs:
+* @li gradient: A Tensor. Has the same type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SmoothL1LossBackward.
+*/
+REG_OP(SmoothL1LossGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossGradV2)
+
+/**
+* @brief Creates a criterion that uses a squared term if the absolute
+* element-wise error falls below beta and an L1 term otherwise. It is
+* less sensitive to outliers than the MSELoss and in some cases prevents
+* exploding gradients.
+
+* @par Inputs:
+* @li predict: A multi-dimensional Tensor of type float16 or float32,
+* specifying the predictive value. \n
+* @li label: A multi-dimensional Tensor of type float16 or float32,
+* specifying the target value. \n
+
+* @par Attributes:
+* @li sigma: An optional int. Specifies the threshold of loss. Defaults
+* to "1.0". \n
+* @li reduction: An optional str. Specifies the reduction to apply to
+* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
+* 'mean': the sum of the output will be divided by the number of elements in
+* the output,'sum': the output will be summed. Default: 'mean'. \n
+
+* @par Outputs:
+* @li loss: Indicates the loss between the predictive value and target value.
+* Has the same dimensions as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator smooth_l1_loss. \n
+*/
+REG_OP(SmoothL1LossV2)
+    .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossV2)
+
+/**
+* @brief Computes Centralization. result = x - mean(x, axes)
+
+* @par Inputs:
+* @li x: An ND tensor of type float16, float32.
+* @par Attributes:
+* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+* Must be in the range [-rank(x), rank(x)).
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* custom operator \n
+*/
+REG_OP(Centralization)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(Centralization)
+
+/**
+* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li target: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32.
+* @li weight: An optional ND tensor of type float16, float32.
+* @li pos_weight: An optional ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
+*/
+REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
+/**
+ * @brief Calculate the PoissonNllLoss function. 
+ *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n
+
+ * @par Inputs:
+ * Two inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * 
+ * @par Inputs:
+ * @li target: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Attributes:
+ * four Attributes, including:
+ * @li log_input: An optional bool. Defaults to "True" \n
+ * 
+ *  @par Attributes:
+ * @li full: An optional bool. Defaults to "False" \n
+ * 
+ *  @par Attributes:
+ * @li eps: An optional float. Defaults to "1e-8" \n
+ * 
+ *  @par Attributes:
+ * @li reduction: An optional string. Defaults to "mean" \n
+
+ * @par Outputs:
+ * loss: A Tensor has same element type as two inputs. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator PoissonNllLoss. \n
+ */
+REG_OP(PoissonNllLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(log_input, Bool, true)
+    .ATTR(full, Bool, false)
+    .ATTR(eps, Float, 1e-8)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(PoissonNllLoss)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_ops.h
new file mode 100644
index 00000000..820aa00d
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_ops.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
+#include "graph/operator_reg.h"
+#include "nn_pooling_ops.h"
+
+namespace ge {
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
+* @li k: A 1D Tensor of the same type as "targets".
+* Specifies the number of top elements to look at for computing precision . \n
+
+* @par Outputs:
+* precision: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li targets must be non-negative tensor.
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator InTopKV2.
+*/
+REG_OP(InTopKV2)
+    .INPUT(predictions, TensorType({DT_FLOAT}))
+    .INPUT(targets, TensorType(IndexNumberType))
+    .INPUT(k, TensorType({IndexNumberType}))
+    .OUTPUT(precision, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(InTopKV2)
+}// namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h
new file mode 100644
index 00000000..9f191ebe
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h
@@ -0,0 +1,1608 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_pooling_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Performs pooling on the input.
+*@par Inputs:
+*@li x: An NCHW tensor of type float16, float32, int8.
+*@par Attributes:
+*@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0".
+*@li global_pooling: An optional bool. Defaults to "false".
+*@li window: Optional, including:
+*window[0]: An optional int32, specifying the window size along in the H dimension. The value range is [1, 32768]. Defaults to "1".
+*window[1]: An optional int32, specifying the window size along in the W dimension. The value range is [1, 32768]. Defaults to "1".
+*@li stride: Optional, including:
+*stride[0]: An optional int32, specifying the stride along in the H dimension. The value range is [1, 63]. Defaults to "1".
+*stride[1]: An optional int32, specifying the stride along in the W dimension. The value range is [1, 63]. Defaults to "1".
+*@li pad: Optional, including:
+*pad[0]: An optional int32, specifying the up padding. Defaults to "0".
+*pad[1]: An optional int32, specifying the bottom padding. Defaults to "0".
+*pad[2]: An optional int32, specifying the left padding. Defaults to "0".
+*pad[3]: An optional int32, specifying the right padding. Defaults to "0".
+*@li dilation: Optional, including:
+*dilation[0]: An optional int32, specifying the up dilation. Defaults to "1".
+*dilation[1]: An optional int32, specifying the bottom dilation. Defaults to "1".
+*dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
+*dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
+*@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
+*@par Outputs:
+*y: An NCHW tensor of type float16, float32, int32.
+*@attention Constraints:
+*@li window[0] * window[1] < 256;
+*@li 1<=input_h<=4096,1<=input_w<=4096
+*@li If input tensor N is a prime number, it should be less than 65535.
+*@par Third-party framework compatibility
+*@li Compatible with the Caffe operator Pooling.
+*@li Compatible with the TensorFlow operator Pooling.
+*/
+REG_OP(Pooling)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT32}))
+    .ATTR(mode, Int, 0)                 // 0:max pooling or 1:avg pooling
+    .ATTR(global_pooling, Bool, false)
+    .ATTR(window, ListInt, {1,1})       // kernel size
+    .ATTR(stride, ListInt, {1,1})       // stride size
+    .ATTR(pad, ListInt, {0,0,0,0})      // pad size
+    .ATTR(dilation, ListInt, {1,1,1,1})
+    .ATTR(ceil_mode, Int, 0)
+    .ATTR(data_format, String, "NCHW")
+    .OP_END_FACTORY_REG(Pooling)
+
+/**
+*@brief Performs average pooling on the input . \n
+
+*@par Inputs:
+*x: A tensor of type float16, float32, double . \n
+
+*@par Attributes:
+*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
+*@li padding: A required string, specifying the padding algorithm, either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
+*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default) . \n
+
+*@par Outputs:
+*y: The average pooled output tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li This operator applies only to a TensorFlow network.
+*@li Only single input and single output are supported.
+*@li Global pooling is supported.
+*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
+*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool.
+*/
+REG_OP(AvgPool)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(AvgPool)
+
+/**
+*@brief Performs average pooling on the input.
+
+*@par Inputs:
+*x: A tensor of type float16, float32, double.
+
+*@par Attributes:
+*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255].
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63].
+*@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding.
+*@li pads: Pad value when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default).
+*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
+*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li exclusive: Ignore padding area or not when calculating average.
+
+*@par Outputs:
+*y: The average pooled output tensor. Has the same type and format as input "x".
+
+*@attention Constraints:
+*@li Only single input and single output are supported.
+*@li Global pooling is supported.
+*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
+*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPoolV2.
+*/
+REG_OP(AvgPoolV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(global_pooling, Bool, false)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(exclusive, Bool, true)
+    .OP_END_FACTORY_REG(AvgPoolV2)
+
+/**
+*@brief Performs average pooling on the input.
+
+*@par Inputs:
+*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+
+*@par Attributes:
+*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
+*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+*@li pads: List of ints, implicit zero paddings on both sides of the input.
+*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+*@li data_format: A string, format of input data . \n
+
+*@par Outputs:
+*y: The average pooled output tensor . \n
+
+*@attention Constraints:
+*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3D.
+*/
+REG_OP(AvgPool3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3D)
+
+
+/**
+*@brief Performs average pooling on the input.
+
+*@par Inputs:
+*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
+*@li multiplier: An optional tensor of float16, float32, double.
+
+*@par Attributes:
+*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
+*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+*@li pads: List of ints, implicit zero paddings on both sides of the input.
+*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+*@li data_format: A string, format of input data . \n
+
+*@par Outputs:
+*y: The average pooled output tensor . \n
+
+*@attention Constraints:
+*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3D.
+*/
+REG_OP(AvgPool3DD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DD)
+
+/**
+* @brief Computes AvgPool3DGrad function.
+
+* @par Inputs:
+* @li orig_input_shape: An NDHWC tensor of type float16, float32, or double.
+* @li grads: An NDHWC tensor of type int32.
+
+* @par Attributes:
+* @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data . 
+
+* @par Outputs:
+* @output: A mutable tensor with the same shape and type as "orig_input".
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+
+REG_OP(AvgPool3DGrad)
+    .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .INPUT(grads, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGrad)
+
+/**
+* @brief Performs average pooling on the input.
+
+* @par Inputs:
+* @li grads: An NDHWC tensor of type float16.
+* @li filter: An optional tensor of type float16, fractal_z_3d layout.
+* @li multiplier: An optional tensor of float16.
+
+* @par Attributes:
+* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
+* @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data . \n
+
+* @par Outputs:
+* @output: The average pooled output tensor . \n
+
+* @attention Constraints:
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3DGradD.
+*/
+REG_OP(AvgPool3DGradD)
+    .INPUT(grads, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGradD)
+
+/**
+*@brief Performs max_pool_ext2 on the input . \n
+
+*@par Inputs:
+* One input:
+*x: An NC1HWC0 Tensor of type float16.
+
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+*@li padding: A required string. No default value.
+*@li data_format: An optional string. Defaults to "NC1HWC0" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolV2.
+*/
+REG_OP(MaxPoolExt2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8,
+                          DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                          DT_UINT16, DT_QINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8,
+                           DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                           DT_UINT16, DT_QINT8}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(MaxPoolExt2)
+
+/**
+*@brief Performs max pooling on the input . \n
+
+*@par Inputs:
+* One input:
+*x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16,
+* int32, int64, uint8, uint16, qint8
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values,
+* specifying the size of the window for each dimension of the input tensor.
+* No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values,
+* specifying the stride of the sliding window for each dimension of
+* the input tensor. No default value.
+*@li padding: A required string. No default value.
+*@li data_format: An optional string. Defaults to "NHWC" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+* ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
+* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID".
+
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPool.
+*/
+REG_OP(MaxPool)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8,
+                          DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                          DT_UINT16, DT_QINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8,
+                           DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(MaxPool)
+
+/**
+*@brief Performs max 3d pooling on the input . \n
+
+*@par Inputs:
+*x: An NC1HWC0 Tensor. Supported type float16, float32, double . \n
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values,
+specifying the size of the window for each dimension of the input tensor.
+No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values,
+specifying the stride of the sliding window for each dimension of
+the input tensor. No default value.
+*@li padding: A required string type of float16.
+*@li pads: A list type of int32. Default value {0, 0, 0}.
+*@li dilation: A list type of int32. Default value {1, 1, 1}.
+*@li ceil_mode: A ceil mode number of int32 . Default value 0.
+*@li data_format: An optional string. Defaults to "NDHWC" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+ * ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
+ * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPool3D.
+*/
+REG_OP(MaxPool3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(pads, ListInt, {0,0,0})
+    .ATTR(dilation, ListInt, {1,1,1})
+    .ATTR(ceil_mode, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(MaxPool3D)
+
+/**
+*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
+* The output is of size H x W, for any input size. 
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32, float64. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveMaxPool2d.
+*/
+REG_OP(AdaptiveMaxPool2d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveMaxPool2d)
+
+/**
+* @brief Computes second-order gradients of the maxpooling3d function . \n
+
+* @par Inputs:
+* @li orig_x: Original forward input tensor(NDC1HWC0) of type float16
+* @li orig_y: Original forward output tensor(NDC1HWC0) of type float16
+* @li grads: Gradient tensor(NDC1HWC0) of type float16
+* @li assist: Assist tensor(NDC1HWC0) of type float16
+
+* @par Attributes:
+* @li ksize: A required list or tuple,
+* specifying the size of the sliding window.
+* @li strides: A required list or tuple,
+* specifying the stride of the sliding window.
+* @li pads: A required list or tuple
+* @li padding: A required string, window sliding mode. Either SAME or VALID.
+* @li data_format: An optional string.
+* Format of the original input, either NCDHW or NDHWC. Defaults to NDHWC . \n
+
+* @attention Constraints:
+* @li Only the Ascend 910 platform is supported.
+* @li "orig_x" and "grads" must have the same shape.
+* @li "orig_y" and "y" must have the same shape. Otherwise, an error is reported.
+* @li "orig_x", "orig_y", "grads", and "y" must be NDC1HWC0 tensors . \n
+
+* @par Outputs:
+* @li y: Result tensor of type float16
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator MaxPool3DGradGrad.
+*/
+
+REG_OP(MaxPool3DGradGrad)
+    .INPUT(orig_x, TensorType::RealNumberType())
+    .INPUT(orig_y, TensorType::RealNumberType())
+    .INPUT(grads, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(MaxPool3DGradGrad)
+
+
+/**
+* @brief Computes gradients of the maxpooling function . \n
+
+* @par Inputs:
+* @li x1: A mutable NC1HWC0 tensor of type RealNumberType.
+* @li x2: A mutable NC1HWC0 tensor of type RealNumberTypex.
+* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
+
+* @par Attributes:
+* @li ksize: A required tuple or list, specifying the size of the window for
+* each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of the sliding
+* window for each dimension of the input tensor.
+* @li padding: A required string, specifying the type of padding algorithm
+* to use.
+* @li data_format: An optional string, Specify the data format of the input and
+* output data. With the default format "NHWC" . \n
+
+* @par Outputs:
+* y: A mutable tensor. Has the same shape and type as "x1" . \n
+
+* @attention Constraints:
+* @li Computing gradients of global pooling is not supported, which means
+* "ksize < x1".
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGrad.
+*/
+REG_OP(MaxPoolGrad)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .INPUT(grad, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(MaxPoolGrad)
+
+/**
+* @brief Computes second-order gradients of the maxpooling function . \n
+
+* @par Inputs:
+* @li x1: Original forward input tensor. Supported type:float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+* @li x2: Has the same type and format as input "x1".
+* @li grad:Has the same type and format as input "x1" . \n
+
+* @par Attributes:
+* @li ksize: A required list or tuple,
+* specifying the size of the sliding window.
+* @li strides: A required list or tuple,
+* specifying the stride of the sliding window.
+* @li padding: A required string, window sliding mode. Either SAME or VALID.
+* @li data_format: An optional string.
+* Format of the original input, either NCHW or NHWC. Defaults to NHWC . \n
+
+* @attention Constraints:
+* @li Only the Ascend 910 platform is supported.
+* @li "x1" and "grads" must have the same shape.
+* @li "x2" and "y" must have the same shape. Otherwise, an error is reported.
+* @li "x1", "x2", "grads", and "y" must be 5D tensors.
+* @li ksize[H] and ksize[W] is in the range [1, 255].
+* @li strides[H] and strides[W] is in the range [1, 63].
+* @li Other dimensions of ksize and strides is 1 . \n
+
+* @par Outputs:
+* @li y: Has the same type and format as input "x1" . \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator MaxPoolGradGrad.
+*/
+REG_OP(MaxPoolGradGrad)
+    .INPUT(x1, TensorType::RealNumberType())
+    .INPUT(x2, TensorType::RealNumberType())
+    .INPUT(grad, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(MaxPoolGradGrad)
+
+/**
+*@brief Performs max_pool_ext2 on the input . \n
+
+*@par Inputs:
+* Two inputs:
+*@li x: An NC1HWC0 Tensor of type float16.
+*@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+*@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
+
+
+*@par Attributes:
+*@li padding: A required string. No default value.
+*@li data_format: An optional string. Defaults to "NC1HWC0" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolV2.
+*/
+REG_OP(MaxPoolV2)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(ksize, TensorType({DT_INT32}))
+    .INPUT(strides, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(MaxPoolV2)
+
+/**
+*@brief Performs max pooling on the input and outputs both max values and
+ * indices . \n
+
+*@par Inputs:
+* One input:
+*x: An 4D Tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]. \n
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values,
+ * specifying the size of the window for each dimension of the input tensor.
+ * No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values,
+ * specifying the stride of the sliding window for each dimension of
+ * the input tensor. No default value.
+*@li padding: A required string. No default value . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x".
+*argmax: A Tensor. Has the same type and format as input "x".
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+ * ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
+ * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolWithArgmax.
+*/
+REG_OP(MaxPoolWithArgmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(Targmax, Int, 7)
+    .OP_END_FACTORY_REG(MaxPoolWithArgmax)
+
+/**
+*@brief Performs the backpropagation of MaxPoolWithArgmax . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An 4d tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]
+*@li grad: An 4d tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]
+*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values,
+ * specifying the size of the window for each dimension of the input tensor.
+ * No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values,
+ * specifying the stride of the sliding window for each dimension of
+ * the input tensor. No default value.
+*@li padding: A required string. No default value . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+ * ksize[1] * ksize[2] <= 255.
+*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+*@li "padding" is either "SAME" or "VALID".
+
+
+*@see max_pool_with_argmax
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGradWithArgmax.
+*/
+REG_OP(MaxPoolGradWithArgmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(grad, TensorType::RealNumberType())
+    .INPUT(argmax, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .OP_END_FACTORY_REG(MaxPoolGradWithArgmax)
+
+/**
+*@brief Performs transform mask to argmax . \n
+
+*@par Inputs:
+* Two input:
+*x: An NC1HWC0 Tensor of type float16.
+*mask: An NC1HWC0 Tensor of type uint16 . \n
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+*@li padding: A required string. No default value . \n
+
+*@par Outputs:
+*argmax: An NC1HWC0 Tensor of type int32 . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+*@li "padding" is either "SAME" or "VALID" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Mask2Argmax.
+*/
+REG_OP(Mask2Argmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(mask, TensorType::IndexNumberType())
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .REQUIRED_ATTR(originshape, ListInt)
+    .OP_END_FACTORY_REG(Mask2Argmax)
+
+/**
+* @brief Computes second-order gradients of the maxpooling function . \n
+
+* @par Inputs:
+* @li x: Original forward input tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+* @li grad: Gradient tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+* @li argmax: An tensor of type int32 or int64.
+* @par Attributes:
+* @li ksize: A required list, specifying the size of the sliding window.
+* @li strides: A required list, specifying the stride of the sliding window.
+* @li padding: A required string, window sliding mode. Either SAME or VALID.
+* @par Outputs:
+* @li y:Result tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64
+
+* @attention Constraints:
+* @li Only the cloud platform is supported.
+* @li "x1" and "grads" must have the same shape.
+* @li length of the shape of x, grads, argmax, y must be 5.
+* @li shape of argmax must be (fmap_n, fmap_c1, kernel_h * kernel_w,
+* (shape_max_pool[2] * shape_max_pool[3] + 15) // 16 * 16, 1),
+* or (fmap_n, fmap_c1, kernel_h * kernel_w,
+* (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
+*/
+REG_OP(MaxPoolGradGradWithArgmax)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(grad, TensorType::RealNumberType())
+    .INPUT(argmax, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .OP_END_FACTORY_REG(MaxPoolGradGradWithArgmax)
+
+/**
+* @brief Computes avgpoograd function . \n
+
+* @par Inputs:
+* @li orig_input_shape: An NHWC tensor of type int32.
+* @li input_grad: An NHWC tensor of type float16, float32, or double . \n
+
+* @par Attributes:
+* @li ksize: A required tuple or list, specifying the size of the window for
+* each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of the sliding
+* window for each dimension of the input tensor.
+* @li padding: A required string, specifying the type of
+* the padding algorithm to use.
+* @li data_format: An optional string. Defaults to "NHWC" . \n
+
+* @par Outputs:
+* @out_grad: A mutable tensor with the same shape and type as "orig_input" . \n
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+REG_OP(AvgPoolGrad)
+    .INPUT(orig_input_shape, TensorType({DT_INT32}))
+    .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(AvgPoolGrad)
+
+/**
+* @brief Computes gradients of average pooling function . \n
+
+* @par Inputs:
+* @input_grad: An NHWC tensor of type float16.
+* @mean_matrix: Assist matrix, an NHWC tensor of type float16.
+* @kernel_matrix: Assist matrix, an NHWC tensor of type float16.
+
+* @par Attributes:
+* @li orig_input_shape: A required Original input dimensions.
+* @li ksize: A required tuple or list, specifying the size of the window
+* for each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of
+* the sliding window for each dimension of the input tensor.
+* @li padding: A required string, specifying the type of the padding algorithm
+* to use.
+* @li data_format: An optional string. Defaults to "NHWC" . \n
+
+* @par Outputs:
+* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPoolGrad instead.
+*/
+REG_OP(AvgPoolGradD)
+    .INPUT(input_grad, TensorType({DT_FLOAT16}))
+    .INPUT(mean_matrix, TensorType({DT_FLOAT16}))
+    .INPUT(kernel_matrix, TensorType({DT_FLOAT16}))
+    .OUTPUT(out_grad, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(AvgPoolGradD)
+
+/**
+* @brief Computes avgpoolv2grad function.
+
+* @par Inputs:
+* @li orig_input_shape: An NHWC tensor of type int32.
+* @li input_grad: An NHWC tensor of type float16, float32, or double.
+
+* @par Attributes:
+* @li ksize: A required tuple or list, specifying the size of the window for
+* each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of the sliding
+* window for each dimension of the input tensor.
+* @li padding_mode: A required string, specifying the type of
+* the padding algorithm to use.
+* @li global_pooling: Whether to use the global pooling. If global_pooling=true,
+* ksize and pads will be ignored. Default False.
+* @li ceil_mode: Whether to use the ceil function to calculate output height and
+* width. Default False.
+* @li exclusive: Whether to exclude padding points. default is true.
+* @li data_format: An optional string. Defaults to "NHWC".
+
+* @par Outputs:
+* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+REG_OP(AvgPoolV2Grad)
+    .INPUT(orig_input_shape, TensorType({DT_INT32}))
+    .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(global_pooling, Bool, false)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(exclusive, Bool, true)
+    .OP_END_FACTORY_REG(AvgPoolV2Grad)
+/**
+* @brief Computes gradients of averagev2 pooling function.
+
+* @par Inputs:
+* @li input_grad: An NHWC tensor of type float16, float32, or double.
+
+* @par Attributes:
+* @li orig_input_shape: A required tuple or list of type int32.
+* @li ksize: A required tuple or list, specifying the size of the window for
+* each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of the sliding
+* window for each dimension of the input tensor.
+* @li padding_mode: A required string, specifying the type of
+* the padding algorithm to use.
+* @li global_pooling: Whether to use the global pooling. If global_pooling=true,
+* ksize and pads will be ignored. Default False.
+* @li ceil_mode: Whether to use the ceil function to calculate output height and
+* width. Default False.
+* @li exclusive: Whether to exclude padding points. default is true.
+* @li data_format: An optional string. Defaults to "NHWC".
+
+* @par Outputs:
+* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+REG_OP(AvgPoolV2GradD)
+    .INPUT(input_grad, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(mean_matrix, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(kernel_matrix, TensorType({DT_FLOAT16}))
+    .OUTPUT(out_grad, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(global_pooling, Bool, false)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(exclusive, Bool, true)
+    .OP_END_FACTORY_REG(AvgPoolV2GradD)
+
+/**
+*@brief :upsample the layer
+
+*@par Inputs:
+* one input, including:
+*@li x: A tensor of type float16 or float32.
+*@par Attributes:
+*@li  scale: A optional float32, scale factor of x. Defaults to "1.0".
+*@li  stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
+*@li  stride_w: An optional int32, broadcast the axis of w. Defaults to "2".
+*@par Outputs:
+*y: A tensor of type float16 or float32.
+*/
+REG_OP(Upsample)
+   .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+   .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+   .ATTR(scale, Float, 1)
+   .ATTR(stride_h, Int, 2)
+   .ATTR(stride_w, Int, 2)
+   .OP_END_FACTORY_REG(Upsample)
+
+/**
+*@brief Computes gradient of the FractionalMaxPool function . \n
+
+*@par Inputs:
+*Inputs include:
+* @li orig_input: A Tensor. Must be one of the following types: float32, float64, int32, int64.
+* @li orig_output: A Tensor. Must have the same type as orig_input.
+* @li out_backprop: A Tensor. Must have the same type as orig_input.
+      4-D with shape [batch, height, width, channels].
+* @li row_pooling_sequence: A Tensor of type int64.
+* @li col_pooling_sequence: A Tensor of type int64 . \n
+
+*@par Attributes:
+*overlapping: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as orig_input . \n
+
+*@attention Constraints:
+*The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow FractionalMaxPoolGrad operator.
+*/
+REG_OP(FractionalMaxPoolGrad)
+    .INPUT(orig_input, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(orig_output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(row_pooling_sequence, TensorType({ DT_INT64 }))
+    .INPUT(col_pooling_sequence, TensorType({ DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64 }))
+    .ATTR(overlapping, Bool, false)
+    .OP_END_FACTORY_REG(FractionalMaxPoolGrad)
+
+/**
+*@brief Performs fractional average pooling on the input . \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: float32, float64, int32, int64.
+ 4-D with shape [batch, height, width, channels] . \n
+
+*@par Attributes:
+*@li pooling_ratio: A list of floats that has length >= 4.
+*@li pseudo_random: An optional bool. Defaults to False.
+*@li overlapping: An optional bool. Defaults to False. When set to True, it means when pooling.
+*@li deterministic: An optional bool. Defaults to False.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type as x.
+*@li row_pooling_sequence: A Tensor of type int64.
+*@li col_pooling_sequence: A Tensor of type int64 . \n
+
+*@attention Constraints:
+*The implementation for FractionalAvgPool on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow FractionalAvgPool operator.
+*/
+REG_OP(FractionalAvgPool)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(row_pooling_sequence, TensorType({DT_INT64}))
+    .OUTPUT(col_pooling_sequence, TensorType({DT_INT64}))
+    .ATTR(pooling_ratio, ListFloat, {})
+    .ATTR(pseudo_random, Bool, false)
+    .ATTR(overlapping, Bool, false)
+    .ATTR(deterministic, Bool, false)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(FractionalAvgPool)
+
+/**
+*@brief Performs fractional max pooling on the input . \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: float32, float64, int32, int64.
+ 4-D with shape [batch, height, width, channels] . \n
+
+*@par Attributes:
+*@li pooling_ratio: A list of floats that has length >= 4. Pooling ratio for each dimension of value.
+*@li pseudo_random: An optional bool. Defaults to False.
+*@li overlapping: An optional bool. Defaults to False.
+*@li deterministic: An optional bool. Defaults to False.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*@li y: A Tensor. Has the same type as x.
+*@li row_pooling_sequence: A Tensor of type int64.
+*@li col_pooling_sequence: A Tensor of type int64 . \n
+
+*@attention Constraints:
+*The implementation for FractionalMaxPool on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow FractionalMaxPool operator.
+*/
+REG_OP(FractionalMaxPool)
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .OUTPUT(row_pooling_sequence, TensorType({DT_INT64}))
+    .OUTPUT(col_pooling_sequence, TensorType({DT_INT64}))
+    .ATTR(pooling_ratio, ListFloat, {})
+    .ATTR(pseudo_random, Bool, false)
+    .ATTR(overlapping, Bool, false)
+    .ATTR(deterministic, Bool, false)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(FractionalMaxPool)
+
+/**
+*@brief Finds values of the n-th order statistic for the last dimension . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8,
+      int16, int8, int64, bfloat16, uint16, half, uint32, uint64.
+* @li n: A Tensor of type int32. 0-D . \n
+
+*@par Attributes:
+*reverse: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for NthElement on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow NthElement operator.
+*/
+REG_OP(NthElement)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                          DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(n, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                          DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(NthElement)
+
+/**
+*@brief Computes gradient of the FractionalAvgPool function . \n
+
+*@par Inputs:
+*Inputs include:
+* @li orig_input_tensor_shape: A Tensor of type int64.
+* @li out_backprop: A Tensor. Must be one of the following types: float32, float64,
+      int32, int64. 4-D with shape [batch, height, width, channels].
+* @li row_pooling_sequence: A Tensor of type int64.
+* @li col_pooling_sequence: A Tensor of type int64 . \n
+
+*@par Attributes:
+*overlapping: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as out_backprop . \n
+
+*@attention Constraints:
+*The implementation for FractionalAvgPoolGrad on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow FractionalAvgPoolGrad operator.
+*/
+REG_OP(FractionalAvgPoolGrad)
+    .INPUT(orig_input_tensor_shape, TensorType({DT_INT64}))
+    .INPUT(out_backprop, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .INPUT(row_pooling_sequence, TensorType({DT_INT64}))
+    .INPUT(col_pooling_sequence, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .ATTR(overlapping, Bool, false)
+    .OP_END_FACTORY_REG(FractionalAvgPoolGrad)
+
+/**
+*@brief Returns the permuted vector/tensor in the destination data format given the . \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: int32, int64. Vector of size 4
+ or Tensor of shape (4, 2) in source data format . \n
+
+*@par Attributes:
+*@li src_format: An optional string. Defaults to "NHWC". source data format.
+*@li dst_format: An optional string. Defaults to "NCHW". destination data format . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for DataFormatVecPermute on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow DataFormatVecPermute operator.
+*/
+REG_OP(DataFormatVecPermute)
+    .INPUT(x, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_INT32, DT_INT64 }))
+    .ATTR(src_format, String, "NHWC")
+    .ATTR(dst_format, String, "NCHW")
+    .OP_END_FACTORY_REG(DataFormatVecPermute)
+
+/**
+* @brief Computes gradients of the MaxPool3D function . \n
+
+* @par Inputs:
+* @li orig_x: A mutable NDC1HWC0 tensor of type float16.
+* @li orig_y: A mutable NDC1HWC0 tensor of type float16.
+* @li grads: A mutable NDC1HWC0 tensor of type float16 . \n
+
+* @par Attributes:
+* @li ksize: A required tuple or list, specifying the size of the window for
+* each dimension of the input tensor.
+* @li strides: A required tuple or list, specifying the stride of the sliding
+* window for each dimension of the input tensor.
+* @li pads: A list of 6 ints. Supports only padding along the D,
+* H and W dimensions in sequence of head, tail, top, bottom, left and right.
+* to use.
+* @li data_format: An optional string, Specify the data format of the input and
+* output data. With the default format "NDHWC" . \n
+
+* @par Outputs:
+* y: A mutable tensor. Has the same shape as "orig_x", but type is float32 . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPool3DGrad.
+*/
+REG_OP(MaxPool3DGrad)
+    .INPUT(orig_x, TensorType::RealNumberType())
+    .INPUT(orig_y, TensorType::RealNumberType())
+    .INPUT(grads, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(MaxPool3DGrad)
+
+/**
+*@brief Performs AvgPool1D on the input . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
+
+*@par Attributes:
+*@li ksize: An required int, specifying the size of the window.
+*@li strides: An required int.
+*@li pads: A required tuple or list.
+*@li ceil_mode: An optional bool. Defaults to False.
+*@li count_include_pad: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@par Third-party framework compatibility
+*@li compatible with pytorch AvgPool1D operator.
+*/
+REG_OP(AvgPool1D)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, Int)
+    .REQUIRED_ATTR(strides, Int)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, false)
+    .OP_END_FACTORY_REG(AvgPool1D)
+
+/**
+*@brief Performs AvgPool1D on the input . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
+
+*@par Attributes:
+*@li ksize: An required int, specifying the size of the window.
+*@li strides: An required int.
+*@li pads: A required tuple or list.
+*@li ceil_mode: An optional bool. Defaults to False.
+*@li count_include_pad: An optional bool. Defaults to False . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@par Third-party framework compatibility
+*@li compatible with pytorch AvgPool1D operator.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use AvgPool1D instead.
+*/
+REG_OP(AvgPool1DD)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(assist_matrix, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, Int)
+    .REQUIRED_ATTR(strides, Int)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, false)
+    .OP_END_FACTORY_REG(AvgPool1DD)
+/**
+*@brief Performs max pooling on the input and outputs both max values and indices . \n
+
+*@par Inputs:
+* One input:
+*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"].
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+* each dimension of the input tensor. No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+* each dimension of the input tensor. No default value.
+*@li pads: A required string. No default value.
+*@li dtype: A optional int. default value is 3.
+*@li dilation: A optional list of int8, int16, int32, or int64 values.
+*@li ceil_mode: A optional bool. default value is false . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x".
+*argmax:  A Tensor. type:uint16, format:NC1HWC0.
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
+* strides[2] <= 63, strides[2] >= 1.
+*@li "dilation" is a list that has length 4.
+*@li "ceil_mode" is a bool, default is false . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolWithArgmax.
+*/
+REG_OP(MaxPoolWithArgmaxV2)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(argmax, TensorType({DT_UINT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolWithArgmaxV2)
+
+/**
+*@brief Performs the backpropagation of MaxPoolWithArgmaxV2 . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
+*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
+*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n
+
+*@par Attributes:
+*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+ * each dimension of the input tensor. No default value.
+*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+ * each dimension of the input tensor. No default value.
+*@li pads: A required string. No default value.
+*@li dtype: A optional int. default value is 3.
+*@li dilation: A optional list of int8, int16, int32, or int64 values.
+*@li ceil_mode: A optional bool. default value is false . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+*@li "dilation" is a list that has length 4.
+*@li "ceil_mode" is a bool, default is false . \n
+
+*@see max_pool_grad_with_argmaxv2
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV2.
+*/
+
+REG_OP(MaxPoolGradWithArgmaxV2)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(grad, TensorType({DT_FLOAT16}))
+    .INPUT(argmax, TensorType({DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1,1,1,1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV2)
+
+/**
+* @brief Performs max pooling on the input . \n
+
+* @par Inputs:
+* One input:
+* x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int32, int64,
+* uint8, int16, int8, uint16, qint8
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
+* specifying the size of the window for each dimension of the input tensor.
+* No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values,
+* specifying the stride of the sliding window for each dimension of
+* the input tensor. No default value.
+* @li padding_mode: A required string. Defaults to "CALCULATED".
+* @li pads:A required list of int8, int16, int32, or int64 values,
+* a data to caculate when padding_mode is "CALCULATED".
+* @li data_format: An optional string. Defaults to "NHWC" .
+* @li global_pooling bool, Whether to use the global pooling.
+* If global_pooling = true, kernel size and paddings will be ignored.
+* Default False
+* @li ceil_mode: Whether to use the ceil function to calculate output
+* height and width. False is the default. If it is set to False,
+* the floor function will be used. Default False \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x" . \n
+
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+* ksize[1] * ksize[2] <= 255.
+* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
+* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
+* @li "padding" is  "SAME" "VALID" or "CACULATE" .
+
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPool.
+*/
+REG_OP(MaxPoolV3)
+    .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16, DT_QINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16, DT_QINT8}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(global_pooling,Bool,false)
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolV3)
+
+/**
+* @brief Computes gradients of the maxpooling function . \n
+
+* @par Inputs:
+* @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType.
+* @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex.
+* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
+* specifying the size of the window for each dimension of the input tensor.
+* No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values,
+* specifying the stride of the sliding window for each dimension of
+* the input tensor. No default value.
+* @li padding_mode: A required string. Defaults to "CALCULATED".
+* @li pads:A required list of int8, int16, int32, or int64 values,
+* a data to caculate when padding_mode is "CALCULATED".
+* @li data_format: An optional string. Defaults to "NHWC" .
+* @li global_pooling bool, Whether to use the global pooling.
+* If global_pooling = true, kernel size and paddings will be ignored.
+* Default False
+* @li ceil_mode: Whether to use the ceil function to calculate output
+* height and width. False is the default. If it is set to False,
+* the floor function will be used. Default False \n
+
+* @par Outputs:
+* y: A mutable tensor. Has the same shape and type as "x1" . \n
+
+* @attention Constraints:
+* @li Computing gradients of global pooling is not supported, which means
+* "ksize < x1".
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGrad.
+*/
+REG_OP(MaxPoolV3Grad)
+    .INPUT(orig_input, TensorType::RealNumberType())
+    .INPUT(orig_output, TensorType::RealNumberType())
+    .INPUT(grad, TensorType::RealNumberType())
+    .OUTPUT(out_grad, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(global_pooling, Bool, false)
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolV3Grad)
+
+/**
+*@brief Performs dilation2d on the input . \n
+
+*@par Inputs:
+*x: A tensor of shape is 4d, format is support NHWC.
+*filter: A tensor of shape is 3d, the type is same with x,
+and the c dimension is same with x. \n
+
+*@par Attributes:
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
+*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: An optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2D.
+*/
+REG_OP(Dilation2D)
+    .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2D)
+
+/**
+* @brief Applies a 2D adaptive average pooling over  
+*       an input signal composed of several input planes.  \n
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2d.
+*/
+REG_OP(AdaptiveAvgPool2d)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2d)
+
+/**
+* @brief Compute gradients of adaptive averagev2 pooling function.
+
+* @par Inputs:
+* @li input_grad: A NCHW Tensor. Must be one of the following data types:
+* float16, float32.
+
+* @par Attributes:
+* @li orig_input_shape: A required tuple or list of type int32.
+
+* @par Outputs:
+* @li output_grad: A tensor with the same shape and type as "orig_input_shape".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.
+*/
+REG_OP(AdaptiveAvgPool2dGrad)
+    .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad)
+
+/**
+* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: An NC1HWC0 tensor of type float16.
+* @li grad: An NC1HWC0 tensor of type float16.
+* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+* each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+* each dimension of the input tensor. No default value.
+* @li pads: A required listint. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x". \n
+
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+* @li "pads" is listint.
+* @li "ceil_mode" defaults to False.
+* @li "data_format" defaults to "NC1HWC0". \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
+*/
+
+REG_OP(MaxPoolGradWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(grad, TensorType({DT_FLOAT16}))
+    .INPUT(argmax, TensorType({DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1)
+
+/**
+* @brief Performs max pooling on the input and outputs both max values and indices.
+
+* @par Inputs:
+* One input:
+* x: An NC1HWC0 Tensor of type float16. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+* each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+* each dimension of the input tensor. No default value.
+* @li pads: A required string. No default value. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x".
+* argmax:  A Tensor. type:uint16, format:NC1HWC0. \n
+
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
+* strides[2] <= 63, strides[2] >= 1.
+* @li "pads" is listint.
+* @li "ceil_mode" defaults to False.
+* @li "data_format" defaults to "NC1HWC0". \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
+*/
+REG_OP(MaxPoolWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(argmax, TensorType({DT_UINT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)
+
+}  // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H
diff --git a/third_party/fwkacllib/inc/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/inc/ops/nn_training_ops.h
new file mode 100644
index 00000000..92074872
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nn_training_ops.h
@@ -0,0 +1,2598 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nn_training_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_
+
+#include "graph/operator_reg.h"
+namespace ge {
+/**
+*@brief Updates "var" according to the AdaMax algorithm.
+*  t-1 mean previous period.
+*  m_t <- beta1 * m{t-1} + (1 - beta1) * grad
+*  v_t <- max(beta2 * v{t-1}, abs(grad))
+*  var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
+*     Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li v: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li beta1_power: A scalar. Has the same type as "var".
+*@li lr: learning_rate. A scalar. Has the same type as "var".
+*@li beta1: A scalar. Has the same type as "var".
+*@li beta2: A scalar. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdaMax.
+*
+*/
+REG_OP(ApplyAdaMax)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(beta1, TensorType::NumberType())
+    .INPUT(beta2, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdaMax)
+
+/**
+*@brief Updates "var" according to the AdaMax algorithm.
+*  t-1 mean previous period.
+*  m_t <- beta1 * m{t-1} + (1 - beta1) * grad
+*  v_t <- max(beta2 * v{t-1}, abs(grad))
+*  var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
+*     Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li v: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li beta1_power: A scalar. Has the same type as "var".
+*@li lr: learning_rate. A scalar. Has the same type as "var".
+*@li beta1: A scalar. Has the same type as "var".
+*@li beta2: A scalar. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li m: A mutable tensor. Has the same type as input "m".
+*@li v: A mutable tensor. Has the same type as input "v".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdaMax.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead.
+*/
+REG_OP(ApplyAdaMaxD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(beta1, TensorType::NumberType())
+    .INPUT(beta2, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(m, TensorType::NumberType())
+    .OUTPUT(v, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdaMaxD)
+
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n
+
+*@par Inputs:
+* Five inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li lr: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type float32 . \n
+
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False" . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyAdagrad.
+*/
+REG_OP(SparseApplyAdagrad)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .ATTR(update_slots, Bool, true)
+    .OP_END_FACTORY_REG(SparseApplyAdagrad)
+
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type int32 . \n
+
+*@par Attributes:
+*@li lr: Required, used for computation.
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False" . \n
+
+*@par Outputs:
+*@li var: A Tensor. Has the same type and format as input "var".
+*@li accum: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyAdagrad. \n
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagrad instead.
+*/
+REG_OP(SparseApplyAdagradD)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .OUTPUT(accum, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .ATTR(use_locking, Bool, false)
+    .ATTR(update_slots, Bool, true)
+    .OP_END_FACTORY_REG(SparseApplyAdagradD)
+
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n
+
+*@par Inputs:
+*Six inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li lr: An NCHW, NHWC, or ND Tensor of type float32.
+*@li epsilon: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type float32 . \n
+
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different . \n
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseApplyAdagradV2.
+*/
+REG_OP(SparseApplyAdagradV2)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(epsilon, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .ATTR(update_slots, Bool, true)
+    .OP_END_FACTORY_REG(SparseApplyAdagradV2)
+
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme . \n
+
+*@par Inputs:
+*Four inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type int32 . \n
+
+*@par Attributes:
+*@li lr: Required, used for computation.
+*@li epsilon: Required, used for computation.
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different . \n
+
+*@par Outputs:
+*@li var: A Tensor. Has the same type and format as input "var".
+*@li accum: A Tensor. Has the same type and format as input "accum" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseApplyAdagradV2. \n
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagradV2 instead.
+*/
+REG_OP(SparseApplyAdagradV2D)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .OUTPUT(accum, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(use_locking, Bool, false)
+    .ATTR(update_slots, Bool, true)
+    .OP_END_FACTORY_REG(SparseApplyAdagradV2D)
+
+/**
+*@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
+*   want to use Nesterov momentum.
+*  computing process:
+*  accum = accum * momentum + grad
+*  var -= lr * accum
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*     If "True", the tensor passed to compute grad will be
+*     var - lr * momentum * accum, so in the end, the var you get is actually
+*     var - lr * momentum * accum.
+*
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
+*     otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyMomentum.
+*
+*/
+
+REG_OP(ApplyMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_nesterov, Bool, false)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyMomentum)
+
+
+/**
+*@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
+*   want to use Nesterov momentum.
+*  computing process:
+*  accum = accum * momentum + grad
+*  var -= lr * accum
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*     If "True", the tensor passed to compute grad will be
+*     var - lr * momentum * accum, so in the end, the var you get is actually
+*     var - lr * momentum * accum.
+*
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
+*     otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+* accum: A mutable tensor. Has the same type as input "accum".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyMomentum.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead.
+*/
+
+REG_OP(ApplyMomentumD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_nesterov, Bool, false)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyMomentumD)
+
+/**
+*@brief Updates '*var' according to the momentum scheme.
+*   accum = accum * momentum - grad * lr
+*   if use_nesterov is True:
+*       var += accum * momentum - grad * lr
+*   else:
+*       var += accum
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li grad: A tensor for the gradient. Has the same type as "var". Should be
+*    from a Variable().
+*@li momentum: A scalar. Has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*    If "True", var will be updated by using Nesterov momentum.
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*/
+REG_OP(ApplyKerasMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(ApplyKerasMomentum)
+
+
+/**
+*@brief Updates '*var' according to the momentum scheme.
+*   accum = accum * momentum - grad * lr
+*   if use_nesterov is True:
+*       var += accum * momentum - grad * lr
+*   else:
+*       var += accum
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li grad: A tensor for the gradient. Has the same type as "var". Should be
+*    from a Variable().
+*@li momentum: A scalar. Has the same type as "var". Should be from a
+*    Variable().
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*    If "True", var will be updated by using nesterov momentum
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li accum: A mutable tensor. Has the same type as input "var"
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyKerasMomentum instead.
+*/
+REG_OP(ApplyKerasMomentumD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(ApplyKerasMomentumD)
+
+
+/**
+*@brief Updates '*var' according to the Adam algorithm.
+*   lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
+*   m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
+*   v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
+*   vhat_t := max{vhat_{t-1}, v_t}
+*   variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li v: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li vhat: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li grad: A tensor for the gradient. Has the same type as "var". Should be
+*    from a Variable().
+*
+*@par Attributes:
+*@li beta1: A scalar. Has the same type as "var".
+*@li beta2: A scalar. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li m: A mutable tensor. Has the same type as input "var"
+*@li v: A mutable tensor. Has the same type as input "var"
+*@li vhat: A mutable tensor. Has the same type as input "var"
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdamWithAmsgrad instead.
+*
+*/
+REG_OP(ApplyAdamWithAmsgradD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(vhat, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(beta2_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(m, TensorType::NumberType())
+    .OUTPUT(v, TensorType::NumberType())
+    .OUTPUT(vhat, TensorType::NumberType())
+    .REQUIRED_ATTR(beta1, Float)
+    .REQUIRED_ATTR(beta2, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdamWithAmsgradD)
+
+
+/**
+*@brief Updates '*var' according to the Adam algorithm..
+*   lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
+*   m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
+*   v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
+*   vhat_t := max{vhat_{t-1}, v_t}
+*   variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li v: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li vhat: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li grad: A tensor for the gradient. Has the same type as "var". Should be
+*    from a Variable().
+*
+*@par Attributes:
+*@li beta1: A scalar. Has the same type as "var".
+*@li beta2: A scalar. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li m: A mutable tensor. Has the same type as input "var"
+*@li v: A mutable tensor. Has the same type as input "var"
+*@li vhat: A mutable tensor. Has the same type as input "var"
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*/
+REG_OP(ApplyAdamWithAmsgrad)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(vhat, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(beta2_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(beta1, TensorType::NumberType())
+    .INPUT(beta2, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdamWithAmsgrad)
+
+
+/**
+*@brief Updates "var" according to the AddSign update.
+*  t-1 mean previous period.
+*  m_t <- beta1 * m_{t-1} + (1 - beta1) * grad
+*  update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad
+*  var <- var - lr * update
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li logbase: A scalar. Has the same type as "var".
+*@li sign_decay: A scalar. Has the same type as "var".
+*@li beta: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyPowerSign.
+*
+*/
+REG_OP(ApplyPowerSign)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(logbase, TensorType::NumberType())
+    .INPUT(sign_decay, TensorType::NumberType())
+    .INPUT(beta, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyPowerSign)
+
+/**
+*@brief Updates "var" according to the AddSign update.
+*  t-1 mean previous period.
+*  m_t <- beta1 * m_{t-1} + (1 - beta1) * grad
+*  update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad
+*  var <- var - lr * update
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li m: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li logbase: A scalar. Has the same type as "var".
+*@li sign_decay: A scalar. Has the same type as "var".
+*@li beta: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li m: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyPowerSign.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead.
+*/
+REG_OP(ApplyPowerSignD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(logbase, TensorType::NumberType())
+    .INPUT(sign_decay, TensorType::NumberType())
+    .INPUT(beta, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(m, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyPowerSignD)
+
+/**
+*@brief Updates "var" as FOBOS algorithm with fixed learning rate.
+*  prox_v = var - alpha * delta
+*  var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li alpha: A scalar. Has the same type as "var".
+*@li l1: A scalar. Has the same type as "var".
+*@li l2: A scalar. Has the same type as "var".
+*@li delta: A tensor. Has the same type as "var". The change.
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyProximalGradientDescent.
+*
+*/
+REG_OP(ApplyProximalGradientDescent)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(alpha, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(delta, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyProximalGradientDescent)
+
+/**
+*@brief Updates "var" according to the AddSign update . \n
+
+*@par Inputs:
+*Seven inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li m: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li alpha: A Tensor of the same type as "var". Must be a scalar.
+* @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
+* @li beta: A Tensor of the same type as "var". Must be a scalar.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "m" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAddSign.
+*/
+REG_OP(ApplyAddSign)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(alpha, TensorType::NumberType())
+    .INPUT(sign_decay, TensorType::NumberType())
+    .INPUT(beta, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAddSign)
+
+/**
+*@brief Updates "var" according to the AddSign update . \n
+
+*@par Inputs:
+*Seven inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li m: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li alpha: A Tensor of the same type as "var". Must be a scalar.
+* @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
+* @li beta: A Tensor of the same type as "var". Must be a scalar.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "m" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*@li var: A mutable Tensor. Has the same type as "var".
+*@li m: A mutable Tensor. Has the same type as "m" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAddSign.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead.
+*/
+REG_OP(ApplyAddSignD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(alpha, TensorType::NumberType())
+    .INPUT(sign_decay, TensorType::NumberType())
+    .INPUT(beta, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(m, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAddSignD)
+
+/**
+*@brief Updates "var" according to the centered RMSProp algorithm.
+*  The centered RMSProp algorithm uses an estimate of the centered second moment
+*  (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+*  uses the (uncentered) second moment. This often helps with training, but is
+*  slightly more expensive in terms of computation and memory.
+*
+*  t-1 mean previous period.
+*  mg <- rho * mg{t-1} + (1-rho) * grad
+*  ms <- rho * ms{t-1} + (1-rho) * grad * grad
+*  mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+*  var <- var - mom
+*
+*@attention Constraints:
+*@li in dense implementation of this algorithm, mg, ms, and mom will
+*    update even if the grad is zero, but in this sparse implementation, mg, ms,
+*    and mom will not update in iterations during which the grad is zero.
+*@li the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li mg: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li ms: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li mom: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li rho: A scalar. Has the same type as "var".
+*@li momentum: A tensor. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyCenteredRMSProp.
+*
+*/
+REG_OP(ApplyCenteredRMSProp)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(mg, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
+
+/**
+*@brief Updates "var" according to the centered RMSProp algorithm.
+*  The centered RMSProp algorithm uses an estimate of the centered second moment
+*  (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+*  uses the (uncentered) second moment. This often helps with training, but is
+*  slightly more expensive in terms of computation and memory.
+*
+*  t-1 mean previous period.
+*  mg <- rho * mg{t-1} + (1-rho) * grad
+*  ms <- rho * ms{t-1} + (1-rho) * grad * grad
+*  mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+*  var <- var - mom
+*
+*@attention Constraints:
+*@li in dense implementation of this algorithm, mg, ms, and mom will
+*    update even if the grad is zero, but in this sparse implementation, mg, ms,
+*    and mom will not update in iterations during which the grad is zero.
+*@li the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li mg: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li ms: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li mom: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li rho: A scalar. Has the same type as "var".
+*@li momentum: A tensor. Has the same type as "var".
+*@li epsilon: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+*@li var: A mutable Tensor. Has the same type as "var".
+*@li mg: A mutable Tensor. Has the same type as "mg".
+*@li ms: A mutable Tensor. Has the same type as "ms".
+*@li mom: A mutable Tensor. Has the same type as "mom" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyCenteredRMSPropD.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead.
+*/
+REG_OP(ApplyCenteredRMSPropD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(mg, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(mg, TensorType::NumberType())
+    .OUTPUT(ms, TensorType::NumberType())
+    .OUTPUT(mom, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyCenteredRMSPropD)
+
+/**
+*@brief Updates "var" by subtracting 'alpha' * 'delta' from it.
+*   var -= delta * alpha
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li alpha: A scalar. Has the same type as "var".
+*@li delta: A tensor for the change. Has the same type as "var".
+*
+*@par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyGradientDescent.
+*
+*/
+REG_OP(ApplyGradientDescent)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(alpha, TensorType::NumberType())
+    .INPUT(delta, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyGradientDescent)
+
+/**
+*@brief Updates "var" according to the adagrad scheme.
+*   accum += grad * grad
+*   var -= lr * grad * (1 / sqrt(accum))
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdagrad.
+*
+*/
+REG_OP(ApplyAdagrad)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(update_slots, Bool, true)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagrad)
+
+/**
+*@brief Updates "var" according to the adagrad scheme.
+*   accum += grad * grad
+*   var -= lr * grad * (1 / sqrt(accum))
+*
+*@attention Constraints:
+*  the input tensors must have the same shape.
+*
+*@par Inputs:
+*@li var: A mutable tensor. Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var".
+*     Should be from a Variable().
+*@li lr: A scalar. Has the same type as "var".
+*@li grad: A tensor for the gradient. Has the same type as "var".
+*
+*@par Attributes:
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "ms", and "mom" tensors is protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li accum: A mutable tensor. Has the same type as input "var".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdagrad.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead.
+*/
+REG_OP(ApplyAdagradD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(update_slots, Bool, true)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagradD)
+
+/**
+* @brief Updates "var" according to the adagradv2 scheme.
+*   accum += grad * grad
+*   var -= lr * grad * (1 / sqrt(accum) + epsilon)
+*
+* @par Inputs:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li accum: A mutable tensor. Has the same type as "var". Should be from a
+* Variable().
+* @li lr: A tensor for the learning rate. Has the same type as "var". Should be
+* from a Variable().
+* @li grad: A tensor for the gradient. Has the same type as "var". Should be
+* from a Variable().
+* @li epsilon: A scalar. Has the same type as "var".
+*
+* @par Attributes:
+* @li update_slots: An optional bool. Defaults to "True".
+* If "True", "accum" will be updated
+* @li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" tensor is protected by a lock;
+* otherwise the behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+* @attention Constraints:
+* The input tensors must have the same shape.
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAdagrad.
+*
+*/
+REG_OP(ApplyAdagradV2)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(update_slots, Bool, true)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagradV2)
+
+
+/**
+* @brief Updates "var" according to the adagradv2 scheme.
+* accum += grad * grad
+* var -= lr * grad * (1 / sqrt(accum) + epsilon)
+*
+* @par Inputs:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li accum: A mutable tensor. Has the same type as "var". Should be from a
+* Variable().
+* @li lr: A tensor for the learning rate. Has the same type as "var". Should be
+* from a Variable().
+* @li grad: A tensor for the gradient. Has the same type as "var". Should be
+* from a Variable().
+*
+* @par Attributes:
+* @li epsilon: A scalar. Has the same type as "var".
+* @li update_slots: An optional bool. Defaults to "True".
+* If "True", "accum" will be updated
+* @li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" tensor is protected by a lock;
+* otherwise the behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+* @attention Constraints:
+* The input tensors must have the same shape.
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAdagrad.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradV2 instead.
+*/
+REG_OP(ApplyAdagradV2D)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(update_slots, Bool, true)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagradV2D)
+
+/**
+*@brief Updates "var" according to the proximal adagrad scheme . \n
+
+*@par Inputs:
+*Eight inputs, including:
+* @li var: A mutable Tensor. Must be one of the following types:
+*     TensorType::NumberType(). Should be a Variable Tensor.
+* @li gradient_accumulator: A mutable Tensor. Must have the same
+*     type as "var". Should be a Variable Tensor.
+* @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var".
+*     Scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var".
+*     L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var".
+*     L2 regulariation. Must be a scalar.
+* @li global_step: A Tensor of type int32 or int64.
+*     Training step number. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the var and accum tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdagradDA.
+*/
+REG_OP(ApplyAdagradDA)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(gradient_accumulator, TensorType::NumberType())
+    .INPUT(gradient_squared_accumulator, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagradDA)
+
+/**
+*@brief Updates "var" according to the proximal adagrad scheme . \n
+
+*@par Inputs:
+*Eight inputs, including:
+* @li var: A mutable Tensor. Must be one of the following types:
+*     TensorType::NumberType(). Should be a Variable Tensor.
+* @li gradient_accumulator: A mutable Tensor. Must have the same
+*     type as "var". Should be a Variable Tensor.
+* @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var".
+*     Scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var".
+*     L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var".
+*     L2 regulariation. Must be a scalar.
+* @li global_step: A Tensor of type int32 or int64.
+*     Training step number. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the var and accum tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var".
+*gradient_accumulator: A mutable Tensor. Has the same type as "var".
+*gradient_squared_accumulator: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdagradDA.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead.
+*/
+REG_OP(ApplyAdagradDAD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(gradient_accumulator, TensorType::NumberType())
+    .INPUT(gradient_squared_accumulator, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(gradient_accumulator, TensorType::NumberType())
+    .OUTPUT(gradient_squared_accumulator, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdagradDAD)
+
+/**
+*@brief Returns the dimension index in the destination data format given the one in
+* the source data format.
+*
+*@par Inputs:
+* x: A tensor of type int32 or int64.
+*     A Tensor with each element as a dimension index in source data format.
+*     Must be in the range [-4, 4).
+*
+*@par Attributes:
+*@li src_format: An optional string. Defaults to NHWC.
+*     source data format. Must of length 4.
+*@li dst_format: An optional string. Defaults to NCHW.
+*     destination data format. Must of length 4.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x". Must be in the range [0, 4).
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator DataFormatDimMap.
+*
+*/
+REG_OP(DataFormatDimMap)
+    .INPUT(x, TensorType::IndexNumberType())
+    .ATTR(src_format, String, "NHWC")
+    .ATTR(dst_format, String, "NCHW")
+    .OUTPUT(y, TensorType::IndexNumberType())
+    .OP_END_FACTORY_REG(DataFormatDimMap)
+
+/**
+* @brief Implements stochastic gradient descent (optionally with momentum).
+* Nesterov momentum is based on the formula from
+* On the importance of initialization and momentum in deep learning.
+
+* @par Inputs:
+* @li parameters: A mutable tensor of type float16 or float32.
+* Specifies the iterable of parameters to optimize or dicts defining parameter
+* groups.
+* @li gradient: A tensor of type float16 or float32.
+* Specifies the gradient of training step.
+* @li learning_rate: A tensor of type float16 or float32.
+* Specifies the learing_rate of training step.
+* @li accum: A tensor of type float16 or float32.
+* Specifies the velocity of training step.
+* @li momentum: A tensor of type float16 or float32.
+* Specifies the momentum factor.
+* @li stat: A tensor of type float16 or float32.
+* Specifies the status representing the first step or not . \n
+
+* @par Attributes:
+* @li dampening: An optional float, specifying the dampening for momentum.
+* Defaults to "0.0".
+* @li weight_decay: An optional float, specifying the L2 penalty. Defaults to
+* "0.0".
+* @li nesterov: An optional bool, specifying whether to enable Nesterov
+* momentum. Defaults to "False" . \n
+
+* @par Outputs:
+* parameters: A mutable tensor same as input "parameters" . \n
+
+* @see ApplyMomentum()
+
+* @par Third-party framework compatibility
+* @li Compatible with the PyTorch operator SGD.
+*/
+REG_OP(SGD)
+    .INPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
+    .INPUT(gradient, TensorType(DT_FLOAT, DT_FLOAT16))
+    .INPUT(learning_rate, TensorType(DT_FLOAT, DT_FLOAT16))
+    .INPUT(accum, TensorType(DT_FLOAT, DT_FLOAT16))
+    .INPUT(momentum, TensorType(DT_FLOAT, DT_FLOAT16))
+    .INPUT(stat, TensorType(DT_FLOAT, DT_FLOAT16))
+    .OUTPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
+    .ATTR(dampening, Float, 0.0)
+    .ATTR(weight_decay, Float, 0.0)
+    .ATTR(nesterov, Bool, false)
+    .OP_END_FACTORY_REG(SGD)
+
+/**
+* @brief Updates "var" according to the RMSProp algorithm.
+*    mean_square = decay * mean_square + (1-decay) * gradient ** 2
+*    Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+*    ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+*    mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+*    var <- var - mom
+*
+* @par Inputs:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li ms: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li mom: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li rho: A scalar. Must have the same type as "var".
+* @li momentum: A scalar. Must have the same type as "var".
+* @li epsilon: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient. Must have the same type as "var".
+*
+* @par Attributes:
+* use_locking: An optional "bool". Defaults to "False". If "True", updating of
+* the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
+* behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+* @attention Constraints:
+* @li Note that in dense implementation of this algorithm, "ms" and "mom" will
+* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"
+* will not update in iterations during which "grad" is 0.
+* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
+*
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator ApplyRMSProp.
+*/
+REG_OP(ApplyRMSProp)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyRMSProp)
+
+/**
+* @brief Updates "var" according to the RMSProp algorithm, a const input will be
+* considered as an attribute.
+*     mean_square = decay * mean_square + (1-decay) * gradient ** 2
+*     Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+*     ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+*     mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+*     var <- var - mom
+*
+* @par Inputs:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li ms: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li mom: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient. Must have the same type as "var".
+*
+* @par Attributes:
+* @li use_locking: An optional "bool". Defaults to "False". If "True", updating
+* of the "var", "ms", and "mom" tensors will be protected by a lock;
+* otherwise the behavior is undefined, but may exhibit less contention.
+* @li rho: A required scalar. Must have the same type as "var".
+* @li momentum: A required scalar. Must have the same type as "var".
+* @li epsilon: A required scalar. Must have the same type as "var".
+*
+* @par Outputs:
+* var: A mutable tensor. Must have the same type as input "var".
+*
+* @attention Constraints:
+* @li Note that in dense implementation of this algorithm, "ms" and "mom" will
+* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"
+* will not update in iterations during which "grad" is 0.
+* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
+*
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator ApplyRMSProp.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyRMSProp instead.
+*/
+REG_OP(ApplyRMSPropD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(ms, TensorType::NumberType())
+    .OUTPUT(mom, TensorType::NumberType())
+    .REQUIRED_ATTR(rho, Float)
+    .REQUIRED_ATTR(momentum, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyRMSPropD)
+
+/**
+*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate . \n
+
+*@par Inputs:
+*Six inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*    Should be from a Variable().
+* @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li grad: A Tensor of the same type as "var", for the gradient . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention . \n
+
+*@par Outputs:
+*var: A mutable tensor. Must have the same type as input "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyProximalAdagrad.
+*/
+REG_OP(ApplyProximalAdagrad)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyProximalAdagrad)
+
+/**
+*@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate . \n
+
+*@par Inputs:
+*Six inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*    Should be from a Variable().
+* @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li grad: A Tensor of the same type as "var", for the gradient . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention . \n
+
+*@par Outputs:
+* @li var: A mutable Tensor. Has the same type as "var".
+* @li accum: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyProximalAdagradD.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead.
+*/
+REG_OP(ApplyProximalAdagradD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyProximalAdagradD)
+
+/**
+*@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.
+* Compared with op ApplyProximalAdagrad, an additional index tensor is input,
+* Only the indices into the first dimensions of "var" and "accum" are updated . \n
+
+*@par Inputs:
+* Seven inputs, including:
+* @li var: A mutable Tensor.
+*     TensorType::NumberType(). Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor. Should be greater than or equal to zero.
+*     Accum and grad cannot be equal to zero at the same time.
+* @li lr: A Tensor of the same type as "var".
+*     Scaling factor. Must be a scalar. Should be greater than zero.
+* @li l1: A Tensor of the same type as "var".
+*     L1 regulariation. Must be a scalar. Should be greater than or equal to zero.
+* @li l2: A Tensor of the same type as "var".
+*     L2 regulariation. Must be a scalar. Should be greater than or equal to zero.
+* @li grad: A Tensor. Has the same type as "var".
+*     The gradient.
+* @li indices: A vector of indices into the first dimension of "var" and "accum".
+*     TensorType::IndexNumberType(). Can contain duplicate values . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the var and accum tensors will be protected by a lock;
+*     If "False", the behavior is undefined, but may exhibit less contention.
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseApplyProximalAdagrad.
+*/
+REG_OP(SparseApplyProximalAdagrad)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyProximalAdagrad)
+
+/**
+*@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
+* Compared with op ApplyProximalAdagrad, an additional index tensor is input,
+* Only the indices into the first dimensions of "var" and "accum" are updated . \n
+
+*@par Inputs:
+* Seven inputs, including:
+* @li var: A mutable Tensor.
+*     TensorType::NumberType(). Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor. Should be greater than or equal to zero.
+*     Accum and grad cannot be equal to zero at the same time.
+* @li lr: A Tensor of the same type as "var".
+*     Scaling factor. Must be a scalar. Should be greater than zero.
+* @li l1: A Tensor of the same type as "var".
+*     L1 regulariation. Must be a scalar. Should be greater than or equal to zero.
+* @li l2: A Tensor of the same type as "var".
+*     L2 regulariation. Must be a scalar. Should be greater than or equal to zero.
+* @li grad: A Tensor. Has the same type as "var".
+*     The gradient.
+* @li indices: A vector of indices into the first dimension of "var" and "accum".
+*     TensorType::IndexNumberType(). Can contain duplicate values . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the var and accum tensors will be protected by a lock;
+*     If "False", the behavior is undefined, but may exhibit less contention . \n
+
+*@par Outputs:
+*@li var: A mutable Tensor. Has the same type as "var".
+*@li accum:  A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseApplyProximalAdagrad.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead.
+*/
+REG_OP(SparseApplyProximalAdagradD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyProximalAdagradD)
+
+/**
+*@brief Updates "var" according to the Ftrl-proximal scheme . \n
+
+*@par Inputs:
+*Eight inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyFtrl.
+*/
+REG_OP(ApplyFtrl)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(linear, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(lr_power, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyFtrl)
+
+/**
+*@brief Updates "var" according to the Ftrl-proximal scheme . \n
+
+*@par Inputs:
+*Eight inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*@li var: A mutable Tensor. Has the same type as "var".
+*@li accum: A mutable Tensor. Has the same type as "accum".
+*@li linear: A mutable Tensor. Has the same type as "linear" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyFtrl.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead.
+*/
+REG_OP(ApplyFtrlD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(linear, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(lr_power, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .OUTPUT(linear, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyFtrlD)
+
+/**
+*@brief Update "var" according to the Ftrl-proximal scheme . \n
+
+*@par Inputs:
+*Nine inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var".
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyFtrlV2.
+*/
+REG_OP(ApplyFtrlV2)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(linear, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(l2_shrinkage, TensorType::NumberType())
+    .INPUT(lr_power, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyFtrlV2)
+
+/**
+*@brief Update "var" according to the Ftrl-proximal scheme . \n
+
+*@par Inputs:
+*Nine inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var".
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var".
+*accum: A mutable Tensor. Has the same type as "accum".
+*linear: A mutable Tensor. Has the same type as "linear" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyFtrlV2.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead.
+*/
+REG_OP(ApplyFtrlV2D)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(linear, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(l1, TensorType::NumberType())
+    .INPUT(l2, TensorType::NumberType())
+    .INPUT(l2_shrinkage, TensorType::NumberType())
+    .INPUT(lr_power, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .OUTPUT(linear, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyFtrlV2D)
+
+/**
+*@brief Updates "var" according to the Adam algorithm.
+*  lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
+*  m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g
+*  v_t <- max(beta2 * v{t-1}, abs(g))
+*  variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
+*
+*@attention Constraints:
+*  *The input tensors must have the same shape.*
+*
+*@par Inputs:
+*@li var: A mutable Tensor of the type TensorType::NumberType().
+*     Should be from a Variable().
+*@li m: A mutable Tensor of the same type as "var".
+*     Should be from a Variable().
+*@li v: A mutable Tensor of the same type as "var".
+*     Should be from a Variable().
+*@li beta1_power: A scalar of the same type as "var".
+*@li beta2_power: A scalar of the same type as "var".
+*@li lr: learning_rate. A scalar of the same type as "var".
+*@li beta1: A scalar of the same type as "var".
+*@li beta2: A scalar of the same type as "var".
+*@li epsilon: A scalar of the same type as "var".
+*@li grad: A Tensor of the same type as "var", for the gradient.
+*
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", m", and "v" tensors will be protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*@li use_nesterov: An optional bool. Defaults to "False".
+      If "True", uses the nesterov update.
+*
+*@par Outputs:
+* var: A mutable Tensor. Has the same type as intput "var" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdam.
+*/
+REG_OP(ApplyAdam)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(beta2_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(beta1, TensorType::NumberType())
+    .INPUT(beta2, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdam)
+
+/**
+*@brief Updates "var" according to the Adam algorithm.
+*  lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
+*  m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g
+*  v_t <- max(beta2 * v{t-1}, abs(g))
+*  variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
+*
+*@attention Constraints:
+*  *The input tensors must have the same shape.*
+*
+*@par Inputs:
+*@li var: A mutable Tensor of the type TensorType::NumberType().
+*     Should be from a Variable().
+*@li m: A mutable Tensor of the same type as "var".
+*     Should be from a Variable().
+*@li v: A mutable Tensor of the same type as "var".
+*     Should be from a Variable().
+*@li beta1_power: A scalar of the same type as "var".
+*@li beta2_power: A scalar of the same type as "var".
+*@li lr: learning_rate. A scalar of the same type as "var".
+*@li beta1: A scalar of the same type as "var".
+*@li beta2: A scalar of the same type as "var".
+*@li epsilon: A scalar of the same type as "var".
+*@li grad: A Tensor of the same type as "var", for the gradient.
+*
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", m", and "v" tensors will be protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less
+*     contention.
+*@li use_nesterov: An optional bool. Defaults to "False".
+      If "True", uses the nesterov update.
+*
+*@par Outputs:
+*@li var: A mutable tensor. Has the same type as input "var".
+*@li m: A mutable tensor. Has the same type as input "m".
+*@li v: A mutable tensor. Has the same type as input "v" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ApplyAdam.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead.
+*/
+REG_OP(ApplyAdamD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(m, TensorType::NumberType())
+    .INPUT(v, TensorType::NumberType())
+    .INPUT(beta1_power, TensorType::NumberType())
+    .INPUT(beta2_power, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(beta1, TensorType::NumberType())
+    .INPUT(beta2, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(m, TensorType::NumberType())
+    .OUTPUT(v, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdamD)
+
+/**
+*@brief Updates "var" according to the proximal adadelta scheme . \n
+
+*@par Inputs:
+*Seven inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li accum_update: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li lr: A scalar of the same type as "var", for the scaling factor.
+* @li rho: A scalar of the same type as "var", for the decay factor.
+* @li epsilon: A scalar of the same type as "var", for the constant factor.
+* @li grad: A Tensor of the same type as "var", for the gradient . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "accum" and "accum_update" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*var: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAdadelta.
+*/
+REG_OP(ApplyAdadelta)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(accum_update, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdadelta)
+
+/**
+*@brief Updates "var" according to the proximal adadelta scheme . \n
+
+*@par Inputs:
+*Seven inputs, including:
+* @li var: A mutable Tensor of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li accum_update: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li lr: A scalar of the same type as "var", for the scaling factor.
+* @li rho: A scalar of the same type as "var", for the decay factor.
+* @li epsilon: A scalar of the same type as "var", for the constant factor.
+* @li grad: A Tensor of the same type as "var", for the gradient . \n
+
+*@par Attributes:
+*use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", "accum" and "accum_update" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention . \n
+
+*@par Outputs:
+*@li var: A mutable Tensor. Has the same type as "var".
+*@li accum: A mutable Tensor. Has the same type as "var".
+*@li accum_update: A mutable Tensor. Has the same type as "var" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ApplyAdadelta.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead.
+*/
+REG_OP(ApplyAdadeltaD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(accum_update, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .OUTPUT(accum_update, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(ApplyAdadeltaD)
+
+/**
+* @brief Updates "var" according to the ApplyMomentum algorithm.
+*   accum = accum * momentum + x1 * x2
+*   if use_nesterov is True:
+*       var -= x1 * x2 * lr + accum * momentum * lr
+*   else:
+*       var -= accum * lr
+*
+* @par Inputs:
+*   Six inputs, including:
+*  @li var: A mutable Tensor has type TensorType::NumberType().
+*      Should be a Variable Tensor.
+*  @li accum: A mutable Tensor has the same type as "var".
+*      Should be a Variable Tensor.
+*  @li lr: A scalar has the same type as "var", for the scaling factor.
+*  @li x1: A Tensor has type TensorType::NumberType().
+*  @li momentum: A scalar has the same type as "var".
+*  @li x2: A scalar has the same type as "var".
+*
+* @par Attributes:
+*   Two attributes, including:
+*  @li use_nesterov: An optional bool. Defaults to "False".
+*       If True, the tensor passed to compute grad will be var - lr * momentum * accum,
+*       so in the end, the var you get is actually var - lr * momentum * accum.
+*  @li use_locking: An optional bool. Defaults to "False".
+*       If "True", updating of the "var", m", and "v" tensors will be protected
+*       by a lock; otherwise the behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+*   Two outputs, including:
+*  @li var: A mutable Tensor has the same type as "var".
+*  @li accum: A mutable Tensor has the same type as "var".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(FusedMulApplyMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_nesterov, Bool, false)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(FusedMulApplyMomentum)
+
+/**
+* @brief Updates "var" according to the ApplyMomentum algorithm.
+*   accum = accum * momentum + x1 * x2
+*   if use_nesterov is True:
+*       var -= x1 * x2 * lr + accum * momentum * lr
+*   else:
+*       var -= accum * lr
+*
+* @par Inputs:
+*   Seven inputs, including:
+*  @li var: A mutable Tensor of type float32.
+*     Should be a Variable Tensor.
+*  @li accum: A mutable Tensor has type TensorType::NumberType().
+*     Should be a Variable Tensor.
+*  @li lr: A scalar has the same type as "accum", for the scaling factor.
+*  @li x1: A Tensor has the same type as "accum".
+*  @li momentum: A scalar has the same type as "accum".
+*  @li x2: A scalar has the same type as "accum".
+*  @li var_copy: A Tensor has type float16.
+*
+* @par Attributes:
+*   Two Attributes, including:
+*  @li use_nesterov: An optional bool. Defaults to "False".
+*     If True, the tensor passed to compute grad will be var - lr * momentum * accum,
+*     so in the end, the var you get is actually var - lr * momentum * accum.
+*  @li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var", m", and "v" tensors will be protected
+*     by a lock; otherwise the behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+*   Three outputs, including:
+*  @li var: A Tensor has the type float32.
+*  @li var_copy: A Tensor has the type float16.
+*  @li accum: A Tensor has the same type as input "accum".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(FusedMulApplyMomentumExtern)
+    .INPUT(var, TensorType(DT_FLOAT))
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .INPUT(var_copy, TensorType(DT_FLOAT16))
+    .OUTPUT(var, TensorType(DT_FLOAT))
+    .OUTPUT(var_copy, TensorType(DT_FLOAT16))
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_nesterov, Bool, false)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
+
+/**
+*@brief Update "g" according to the LARS algorithm . \n
+
+*@par Inputs:
+*Four inputs, including:
+* @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
+* @li g: A Tensor of the same type and shape as "w".
+* @li weight_decay: A Tensor of the same type as "w",  Must be a scalar.
+* @li learning_rate: A Tensor of the same type as "w", Must be a scalar . \n
+
+*@par Attributes:
+*Three Attributes, including:
+* @li hyperpara: An optional float. Default value is 0.001.
+* @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
+* @li use_clip: An optional bool. Defaults to "False".
+*     If "True", updating learning rate . \n
+
+*@par Outputs:
+*g_new: Tensor of the same type as "w".
+*/
+REG_OP(LarsV2)
+    .INPUT(w, TensorType(DT_FLOAT))
+    .INPUT(g, TensorType(DT_FLOAT))
+    .INPUT(weight_decay, TensorType(DT_FLOAT))
+    .INPUT(learning_rate, TensorType(DT_FLOAT))
+    .OUTPUT(g_new, TensorType(DT_FLOAT))
+    .ATTR(hyperpara, Float, 0.001)
+    .ATTR(epsilon, Float, 0.00001)
+    .ATTR(use_clip, Bool, false)
+    .OP_END_FACTORY_REG(LarsV2)
+
+/**
+*@brief Update "g" according to the LARS algorithm . \n
+
+*@par Inputs:
+*Six inputs, including:
+* @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
+* @li g: A Tensor of the same type and shape as "w".
+* @li w_square_sum: A Tensor of  square_sum(w), has the same type as "w",  Must be a scalar.
+* @li g_square_sum: A Tensor of  square(g), has the same type as "w", Must be a scalar.
+* @li weight_decay: A Tensor of the same type as "w",  Must be a scalar.
+* @li learning_rate: A Tensor of the same type as "w", Must be a scalar . \n
+
+*@par Attributes:
+*Three Attributes, including:
+* @li hyperpara: An optional float. Default value is 0.001.
+* @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
+* @li use_clip: An optional bool. Defaults to "False".
+*     If "True", updating learning rate . \n
+
+*@par Outputs:
+*g_new: Tensor of the same type as "w".
+*/
+REG_OP(LarsV2Update)
+    .INPUT(w, TensorType(DT_FLOAT))
+    .INPUT(g, TensorType(DT_FLOAT))
+    .INPUT(w_square_sum, TensorType(DT_FLOAT))
+    .INPUT(g_square_sum, TensorType(DT_FLOAT))
+    .INPUT(weight_decay, TensorType(DT_FLOAT))
+    .INPUT(learning_rate, TensorType(DT_FLOAT))
+    .OUTPUT(g_new, TensorType(DT_FLOAT))
+    .ATTR(hyperpara, Float, 0.001)
+    .ATTR(epsilon, Float, 0.00001)
+    .ATTR(use_clip, Bool, false)
+    .OP_END_FACTORY_REG(LarsV2Update)
+
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme . \n
+
+* @par Inputs:
+* Nine inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+* Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor. The value of accum must be greater than 0.
+* @li linear: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+* The value of indices must be unique. Otherwise, the result is unpredictable.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" and "accum" tensors will be
+* protected by a lock; otherwise the behavior is undefined,
+* but may exhibit less contention . \n
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyFtrl.
+*/
+REG_OP(SparseApplyFtrl)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(l1, TensorType({DT_FLOAT}))
+    .INPUT(l2, TensorType({DT_FLOAT}))
+    .INPUT(lr_power, TensorType({DT_FLOAT}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrl)
+
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme . \n
+
+* @par Inputs:
+* Five inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+* Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor. The value of accum must be greater than 0.
+* @li linear: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+* The value of indices must be unique. Otherwise, the result is unpredictable . \n
+
+* @par Attributes:
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" and "accum" tensors will be
+* protected by a lock; otherwise the behavior is undefined,
+* but may exhibit less contention . \n
+
+* @par Outputs:
+* @li var: A Tensor. Has the same type and format as input "var".
+* @li accum: A Tensor. Has the same type and format as input "accum".
+* @li linear: A Tensor. Has the same type and format as input "linear" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyFtrl.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyFtrl instead.
+*/
+REG_OP(SparseApplyFtrlD)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .OUTPUT(accum, TensorType({DT_FLOAT}))
+    .OUTPUT(linear, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .REQUIRED_ATTR(l1, Float)
+    .REQUIRED_ATTR(l2, Float)
+    .REQUIRED_ATTR(lr_power, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlD)
+
+/**
+* @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
+* That is for rows we have grad for, "var", "accum" and "linear" are updated . \n
+
+* @par Inputs:
+* Ten inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of "var" and "accum".
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar . \n
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" and "accum" tensors will be
+* protected by a lock; otherwise the behavior is undefined,
+* but may exhibit less contention . \n
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyFtrlV2.
+*/
+REG_OP(SparseApplyFtrlV2)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(l1, TensorType({DT_FLOAT}))
+    .INPUT(l2, TensorType({DT_FLOAT}))
+    .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
+    .INPUT(lr_power, TensorType({DT_FLOAT}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlV2)
+
+/**
+* @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
+* That is for rows we have grad for, "var", "accum" and "linear" are updated . \n
+
+* @par Inputs:
+* Five inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+* Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+* Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of "var" and "accum" . \n
+
+* @par Attributes:
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" and "accum" tensors will be
+* protected by a lock; otherwise the behavior is undefined,
+* but may exhibit less contention . \n
+
+* @par Outputs:
+* @li var: A Tensor. Has the same type and format as input "var".
+* @li accum: A Tensor. Has the same type and format as input "accum".
+* @li linear: A Tensor. Has the same type and format as input "linear" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyFtrlV2D.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyFtrlV2 instead.
+*/
+REG_OP(SparseApplyFtrlV2D)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .OUTPUT(accum, TensorType({DT_FLOAT}))
+    .OUTPUT(linear, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .REQUIRED_ATTR(l1, Float)
+    .REQUIRED_ATTR(l2, Float)
+    .REQUIRED_ATTR(l2_shrinkage, Float)
+    .REQUIRED_ATTR(lr_power, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
+
+/**
+* @brief Updates "var" in specified index according to the RMSProp algorithm.
+*    mean_square = decay * mean_square + (1-decay) * gradient ** 2
+*    Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+*    ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+*    mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+*    var <- var - mom
+*
+* @par Inputs:
+* Nine inputs, including:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li ms: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li mom: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li rho: A scalar. Must have the same type as "var".
+* @li momentum: A scalar. Must have the same type as "var".
+* @li epsilon: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient.
+* @li indices: A vector of indices into the first dimension of "var", "mom" and "ms".
+*
+* @par Attributes:
+* use_locking: An optional "bool". Defaults to "False". If "True", updating of
+* the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
+* behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+* @attention Constraints:
+* @li Note that in this sparse implementation, "ms" and "mom" will not update
+* in iterations during which "grad" is 0.
+* @li The input tensors "var", "ms", and "mom" must have the same shape.
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyRMSProp.
+*/
+REG_OP(SparseApplyRMSProp)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyRMSProp)
+
+/**
+* @brief Updates "var" in specified index according to the RMSProp algorithm.
+* a const input will be considered as an attribute.
+*     mean_square = decay * mean_square + (1-decay) * gradient ** 2
+*     Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+*     ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+*     mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+*     var <- var - mom
+*
+* @par Inputs:
+* Six inputs, including:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li ms: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li mom: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient.
+*
+* @par Attributes:
+* @li use_locking: An optional "bool". Defaults to "False". If "True",
+* updating of the "var", "ms", and "mom" tensors will be protected by a lock;
+* otherwise the behavior is undefined, but may exhibit less contention.
+* @li rho: A required scalar. Must have the same type as "var".
+* @li momentum: A required scalar. Must have the same type as "var".
+* @li epsilon: A required scalar. Must have the same type as "var".
+*
+* @par Outputs:
+* @li var: A mutable tensor. Must have the same type as input "var".
+* @li ms:  A mutable tensor. Must have the same type as input "ms".
+* @li mom: A mutable tensor. Must have the same type as input "mom".
+*
+* @attention Constraints:
+* @li Note that in this sparse implementation, "ms" and "mom" will not update
+* in iterations during which "grad" is 0.
+* @li The input tensors "var", "ms" and "mom" must have the same shape.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyRMSProp instead.
+*/
+REG_OP(SparseApplyRMSPropD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(ms, TensorType::NumberType())
+    .INPUT(mom, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(ms, TensorType::NumberType())
+    .OUTPUT(mom, TensorType::NumberType())
+    .REQUIRED_ATTR(rho, Float)
+    .REQUIRED_ATTR(momentum, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyRMSPropD)
+
+/**
+* @brief Updates "var" in specified index according to the Adadelta algorithm.
+*    accum <- rho * accum + (1 - rho) * grad.square()
+*    update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad
+*    var <- var - update * lr
+*    accum_update <- rho() * accum_update + (1 - rho()) * update.square()
+*
+* @par Inputs:
+* Eight inputs, including:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li accum: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li rho: A scalar. Must have the same type as "var".
+* @li epsilon: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient.
+* @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
+*
+* @par Attributes:
+* use_locking: An optional "bool". Defaults to "False". If "True", updating of
+* the "var", "accum", and "accum_update" tensors will be protected by a lock; otherwise the
+* behavior is undefined, but may exhibit less contention.
+*
+* @par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+* @attention Constraints:
+* @li Note that in this sparse implementation, "accum" and "accum_update" will not update
+* in iterations during which "grad" is 0.
+* @li The input tensors "var", "accum", and "accum_update" must have the same shape.
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseApplyAdadelta.
+*/
+REG_OP(SparseApplyAdadelta)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(accum_update, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(epsilon, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyAdadelta)
+
+/**
+* @brief Updates "var" in specified index according to the Adadelta algorithm.
+* a const input will be considered as an attribute.
+*    accum <- rho * accum + (1 - rho) * grad.square()
+*    update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad
+*    var <- var - update * lr
+*    accum_update <- rho() * accum_update + (1 - rho()) * update.square()
+*
+* @par Inputs:
+* Seven inputs, including:
+* @li var: A mutable tensor. Must be one of the data types defined in
+* TensorType::NumberType(). Should be from a Variable().
+* @li accum: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
+* Variable().
+* @li lr: A scalar. Must have the same type as "var".
+* @li rho: A scalar. Must have the same type as "var".
+* @li grad: A tensor, specifying the gradient.
+* @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
+*
+* @par Attributes:
+* @li use_locking: An optional "bool". Defaults to "False". If "True",
+* updating of the "var", "accum", and "accum_update" tensors will be protected by a lock;
+* otherwise the behavior is undefined, but may exhibit less contention.
+* @li epsilon: A required scalar. Must have the same type as "var".
+*
+* @par Outputs:
+* @li var: A mutable tensor. Must have the same type as input "var".
+* @li accum:  A mutable tensor. Must have the same type as input "accum".
+* @li accum_update: A mutable tensor. Must have the same type as input "accum_update".
+*
+* @attention Constraints:
+* @li Note that in this sparse implementation, "accum" and "accum_update" will not update
+* in iterations during which "grad" is 0.
+* @li The input tensors "var", "accum" and "accum_update" must have the same shape.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdadelta instead.
+*/
+REG_OP(SparseApplyAdadeltaD)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(accum_update, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(rho, TensorType::NumberType())
+    .INPUT(grad, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .OUTPUT(accum_update, TensorType::NumberType())
+    .REQUIRED_ATTR(epsilon, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyAdadeltaD)
+
+
+/**
+*@brief Clean memory of workspace list . \n
+
+*@par Attributes:
+* @li automic_add_mem_size: sizes of workspaces . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(AtomicAddrClean)
+    .ATTR(automic_add_mem_size, ListInt, {})
+    .OP_END_FACTORY_REG(AtomicAddrClean)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/no_op.h b/third_party/fwkacllib/inc/inc/ops/no_op.h
new file mode 100644
index 00000000..b27b1fa0
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/no_op.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file no_op.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Does nothing. Only useful as a placeholder for control edges . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator NoOp.
+*/
+
+REG_OP(NoOp)
+    .OP_END_FACTORY_REG(NoOp)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h
new file mode 100644
index 00000000..a225bb5f
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h
@@ -0,0 +1,889 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file nonlinear_fuc_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Computes the for the gelu of "x" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Gelu
+*/
+REG_OP(Gelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Gelu)
+
+/**
+*@brief Computes the gradient for the gelu of "x" . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "dy".
+* @li y: A Tensor of the same type as "dy" . \n
+
+*@par Outputs:
+*z: A Tensor. Has the same type as "dy".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator GeluGrad
+*/
+REG_OP(GeluGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(GeluGrad)
+
+/**
+*@brief Computes the for the fast_gelu of "x" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FastGelu
+*/
+REG_OP(FastGelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(FastGelu)
+
+/**
+*@brief Computes the gradient for the fast_gelu of "x" . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32
+* @li x: A Tensor of the same type as "dy" . \n
+
+*@par Outputs:
+*z: A Tensor. Has the same type as "dy".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FastGeluGrad
+*/
+REG_OP(FastGeluGrad)
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(FastGeluGrad)
+
+
+/**
+*@brief Computes the gradient for the tanh of "x" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li y: A Tensor. Must be one of the following types: float16, float32,
+*     double, complex64, complex128.
+* @li dy: A Tensor of the same type as "y" . \n
+
+*@par Outputs:
+*z: A Tensor. Has the same type as "y".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator TanhGrad.
+*/
+REG_OP(TanhGrad)
+    .INPUT(y, TensorType::UnaryDataType())
+    .INPUT(dy, TensorType::UnaryDataType())
+    .OUTPUT(z, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(TanhGrad)
+
+/**
+*@brief: Computes hyperbolic tangent of "x" element-wise . \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, double . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Tanh.
+*/
+REG_OP(Tanh)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Tanh)
+
+/**
+* @brief Computes rectified linear: "max(x, 0)".
+*
+* @par Inputs:
+* x: A tensor. Must be one of the following types: float32, float64, int32, uint8,
+*     int16, int8, int64, uint16, float16, qint8.
+*
+* @par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator Relu.
+* @li Compatible with the Caffe operator ReLULayer.
+*
+*/
+REG_OP(Relu)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
+                          DT_INT8, DT_INT32, DT_INT16, DT_INT64,
+                          DT_UINT8, DT_UINT16, DT_QINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
+                           DT_INT8, DT_INT32, DT_INT16, DT_INT64,
+                           DT_UINT8, DT_UINT16, DT_QINT8}))
+    .OP_END_FACTORY_REG(Relu)
+
+/**
+* @brief Computes rectified linear 6.
+* activations = min(max(x, 0), 6) . \n
+
+* @par Inputs:
+* x: A Tensor of type RealNumberType . \n
+
+* @par Outputs:
+* y: A Tensor of type RealNumberType . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Relu6.
+*/
+REG_OP(Relu6)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(Relu6)
+
+/**
+* @brief Computes rectified linear 6*scale.
+* activations = min(max(x, 0), 6*scale) . \n
+
+* @par Inputs:
+* x: A Tensor of type RealNumberType . \n
+
+* @par Attributes:
+* epsilon: A required scalar. The data type is float32 . \n
+
+* @par Outputs:
+* y: A Tensor of type RealNumberType . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Relu6.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Relu6 instead.
+*/
+REG_OP(Relu6D)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .ATTR(scale, Float, 1.0)
+    .OP_END_FACTORY_REG(Relu6D)
+
+/**
+* @brief Computes rectified linear 6 gradients for a Relu6 operation.
+*     backprops = gradients * (features > 0) * (features < 6) . \n
+
+* @par Inputs:
+* @li features: A Tensor of type RealNumberType.
+* @li gradients: A Tensor of type RealNumberType . \n
+
+* @par Outputs:
+* backprops: A Tensor of type RealNumberType . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Relu6Grad.
+*/
+REG_OP(Relu6Grad)
+    .INPUT(gradients, TensorType::RealNumberType())
+    .INPUT(features, TensorType::RealNumberType())
+    .OUTPUT(backprops, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(Relu6Grad)
+/**
+*@brief Calculate the elu_grad_v2 function. 
+*Applies the element-wise function:
+* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*     float16, float32. 
+* @li activations: A tensor. Must be one of the following types:
+*     float16, float32. 
+*
+*@par Outputs:
+*y: A Tensor with the same type and shape of grads's.
+* 
+*@par Attributes:
+*@li alpha: scalar parameter, default value = 1.0
+*/	
+REG_OP(EluGradV2)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(EluGradV2)
+/**
+* @brief Compute sigmoid of "x" element-wise . \n
+
+* @par Inputs:
+* A Tensor of type complex64, complex128, float16, float32 or double . \n
+
+* @par Outputs:
+* A Tensor. Has the same type as "x" . \n
+
+* @see Relu()
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sigmoid.
+*/
+REG_OP(Sigmoid)
+    .INPUT(x, TensorType::UnaryDataType())
+    .OUTPUT(y, TensorType::UnaryDataType())
+    .OP_END_FACTORY_REG(Sigmoid)
+
+/**
+* @brief Computes z = (y - y*y)*dy . \n
+
+* @par Inputs:
+* @li y: The input is Tensor, dtype is UnaryDataType.
+* @li dy: The input is Tensor, dtype is UnaryDataType . \n
+
+* @par Outputs:
+* z: The shape of output, dtype is UnaryDataType.
+*/
+REG_OP(SigmoidGrad)
+    .INPUT(y, TensorType(UnaryDataType))
+    .INPUT(dy, TensorType(UnaryDataType))
+    .OUTPUT(z, TensorType(UnaryDataType))
+    .OP_END_FACTORY_REG(SigmoidGrad)
+
+/**
+*@brief Computes the binomial normal log likelihood (BNLL) output:
+*if x>0, x+log(1+exp(-x)); otherwise log(1+exp(x)) . \n
+
+*@par Inputs:
+*x: A Tensor of type double, float16 or float32 . \n
+
+*@par Outputs:
+*y: A tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator BNLL.
+*/
+REG_OP(BNLL)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(BNLL)
+
+/**
+*@brief Computes softplus: log(exp(x) + 1) . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor of type float16 or float32. Up to 8D . \n
+
+*@par Outputs:
+*y: The activations tensor. Has the same type and format as input "x"
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Softplus.
+*/
+REG_OP(Softplus)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Softplus)
+
+/**
+*@brief Computes softplus gradients for a softplus operation . \n
+
+*@par Inputs:
+*Two inputs:
+* @li gradients: An NC1HWC0 or ND Tensor of type float16 or float32.
+* @li features: An NC1HWC0 or ND Tensor of type float16 or float32.
+
+
+*@par Outputs:
+*backprops: A Tensor. Has the same type and format as input "gradients" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SoftplusGrad.
+*/
+REG_OP(SoftplusGrad)
+    .INPUT(gradients, TensorType::FloatingDataType())
+    .INPUT(features, TensorType::FloatingDataType())
+    .OUTPUT(backprops, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(SoftplusGrad)
+
+/**
+*@brief Computes softsign: x/(abs(x) + 1) . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor of type float16 or float32. Up to 8D . \n
+
+*@par Outputs:
+*y: The activations tensor. Has the same type and format as "x"
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Softsign.
+*/
+REG_OP(Softsign)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Softsign)
+
+/**
+*@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float16, float, double
+ * int32, int8. format:ND, NC1HWC0 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n
+
+*@see Region()
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Selu.
+*/
+REG_OP(Selu)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,
+                                 DT_INT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,
+                                     DT_INT8,DT_INT32}))
+    .OP_END_FACTORY_REG(Selu)
+
+/**
+*@brief Computes rectified linear gradients for a ReLU operation . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li gradients: A Tensor. Must be one of the following types: float32, double,
+ * int32, int8, int16, int64, uint16, float16, uint32, uint64
+*@li features: A Tensor. Must be one of the following types: float32, double,
+ * int32, int8, int16, int64, uint16, float16, uint32, uint64
+
+*@par Outputs:
+*backprops: A Tensor. Must have the same type as"gradients" . \n
+
+*@attention Constraints:
+* The corresponding Relu operator needs to be called before using this operator on the network . \n
+
+*@see Relu
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ReluGrad.
+*/
+REG_OP(ReluGrad)
+    .INPUT(gradients, TensorType::RealNumberType())
+    .INPUT(features, TensorType::RealNumberType())
+    .OUTPUT(backprops, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(ReluGrad)
+
+/**
+*@brief Computes rectified linear gradients for a ReLU operation . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li gradients: A Tensor. Must be one of the following types: float32, double, int32, int8, int16,  int8, int64, uint16, float16, uint32, uint64
+*@li mask: A Tensor. Must be the following types: uint8
+
+*@par Outputs:
+*backprops: A Tensor. Must have the same type as"gradients" . \n
+
+*@attention Constraints:
+* The corresponding Relu operator needs to be called before using this operator on the network . \n
+
+*@see Relu
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator ReluGradV2.
+*/
+REG_OP(ReluGradV2)
+    .INPUT(gradients, TensorType::RealNumberType())
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(backprops, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(ReluGradV2)
+
+/**
+*@brief Computes rectified linear: "max(x, 0)".
+*
+*@attention Constraints:
+* The last dimension must be divisible by 8.
+* The second output "mask" is "1" (for y >= 0) or "0" ( for y < 0).
+*
+*@par Inputs:
+* x: A tensor. Must be one of the following types: float32, float64, int32, uint8,
+*     int16, int8, int64, uint16, float16, qint8.
+*
+*@par Outputs:
+*@li y: A tensor. Has the same type as "x".
+*@li mask: A tensor of type uint8.
+*
+*@par Third-party framework compatibility
+* Incompatible with TensorFlow or Caffe.
+*
+*/
+REG_OP(ReluV2)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8}))
+    .OUTPUT(mask, TensorType({DT_UINT8}))
+    .OP_END_FACTORY_REG(ReluV2)
+
+/**
+*@brief Performs parametric ReLU . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A multi-dimensional Tensor of type float16 or float32.
+*@li weight: A Scalar or 1D Tensor of type float16 or float32, specifying the weight, the initial value of "a". The number of dimensions must be the same as the number of channels . \n
+
+*@par Outputs:
+*y: An activated Tensor. Has the same dimensions with "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with PyTorch and Caffe operator PReLU.
+*/
+REG_OP(PRelu)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(weight, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(PRelu)
+
+/**
+*@brief Performs the backpropagation of PRelu for training scenarios . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li grads: Input gradient. Multi-dimensional Tensors are supported. The data type can be float16 or float32.
+*@li features: A multi-dimensional Tensor of type float16 or float32.
+*@li weights: A Scalar or 1D Tensor of type float16 or float32, specifying the weight. The number of dimensions must be the same as the number of channels . \n
+
+*@par Outputs:
+*@li dx: Reverse gradient of "features". Has the same dimensions and type as "features".
+*@li da: Reverse gradient of "weight". Has the same dimensions and type as "features" . \n
+
+*@par Third-party framework compatibility
+* Compatible with PyTorch operator PReluGrad.
+*/
+REG_OP(PReluGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weights, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(da, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(PReluGrad)
+
+/**
+*@brief Activation function fused from sigmoid and ReLU, with soft saturation
+*    on the left and no saturation on the right . \n
+
+*@par Inputs:
+*x: A float16, float32 or double, for the input data type . \n
+
+*@par Attributes:
+*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Outputs:
+*y: A float16, float32 or double, for the normalized result . \n
+
+*@attention Constraints:
+*@li The input is of type float16 or float32 . \n
+
+*@par Multiple batches supported or not
+*Supported
+*@par Third-party framework compatibility
+*@li Compatible with Tensorflow's Elu operator
+*@li Compatible with Caffe's ELULayer operator
+*
+*@since V100R001C33
+*/
+REG_OP(Elu)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(Elu)
+
+/**
+*@brief Computes gradients for the exponential linear (Elu) operation.
+*
+*@par Inputs:
+*@li grads: A tensor. Must be one of the following types: float16, float32, float64.
+*     The backpropagated gradients to the corresponding Elu operation.
+*@li activations: A tensor. Has the same type as "grads".
+*     The outputs of the corresponding Elu operation.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "grads".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator EluGrad.
+*
+*/
+REG_OP(EluGrad)
+    .INPUT(grads, TensorType::FloatingDataType())
+    .INPUT(activations, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(EluGrad)
+
+/**
+*@brief Computes the output as x if x > 0 and negative_slope * x if x <= 0 . \n
+
+*@par Inputs:
+* One input:
+* x: A Tensor. Must be one of the following types: float32, float16, double.
+*
+*@par Attributes:
+*negative_slope: A float32. Defaults to "0.0".
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator ReLU.
+*/
+REG_OP(LeakyRelu)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
+    .ATTR(negative_slope, Float, 0.0)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(LeakyRelu)
+
+/**
+*@brief Computes the output as gradients if features > 0 and negative_slope * gradients if features <= 0 . \n
+
+*@par Inputs:
+* Two inputs, including:
+* @li gradients: A Tensor. Must be one of the following types: float16, float32, double.
+* @li features: A Tensor. Has the same type as "gradients" . \n
+
+*@par Attributes:
+*negative_slope: A float32. Defaults to "0.0" . \n
+
+*@par Outputs:
+*backprops: A Tensor. Has the same type as "gradients" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator LeakyReluGrad.
+*/
+REG_OP(LeakyReluGrad)
+    .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(negative_slope, Float, 0.0)
+    .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(LeakyReluGrad)
+
+/**
+*@brief Thresholds grad each element of the input Tensor . \n
+
+*@par Inputs:
+* @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32.
+* @li features: A Tensor shape and dtype of input features. Support float16, int32 . \n
+
+*@par Attributes:
+*threshold: A float32 scale value to threshold at . \n
+
+*@par Outputs:
+*backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ThresholdGradV2D)
+    .INPUT(gradients, TensorType({DT_INT32, DT_FLOAT16}))
+    .INPUT(features, TensorType({DT_INT32, DT_FLOAT16}))
+    .OUTPUT(backprops, TensorType({DT_INT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(threshold, Float)
+    .OP_END_FACTORY_REG(ThresholdGradV2D)
+
+/**
+*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n
+
+*@par Inputs:
+*x: A Tensor dtype of real number . \n
+
+*@par Attributes:
+*@li threshold: A float32 scale value to threshold at.
+*@li value: A float32 scale value to replace with . \n
+
+*@par Outputs:
+*y: A Tensor of shape and dtype of output, should be same shape and type as input . \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(ThresholdV2D)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(threshold, Float)
+    .REQUIRED_ATTR(value, Float)
+    .OP_END_FACTORY_REG(ThresholdV2D)
+
+/**
+*@brief: Computes hyperbolic tangent of "x" element-wise . \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Mish.
+*/
+
+REG_OP(Mish)
+    .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OP_END_FACTORY_REG(Mish)
+
+/**
+ * @brief pytorch hardtanh_backward operator.
+ *
+ * @par Inputs:
+ * 2 inputs, including:
+ * @li result, minimum tensor of the linear region range,
+ * datatype: float16/float32, format:ND/5HD.
+ * @li grad, maximum tensor of the linear region range,
+ * datatype:float16/float32, format:ND/5HD. \n
+
+ * @par Attributes:
+ * 2 attributes, including:
+ * @li min_val, minimum value of the linear region range, datatype:float.
+ * @li max_val, maximum value of the linear region range, datatype:float. \n
+
+ * @par Outputs:
+ * 1 output, including:
+ * @li y, hardtanh_backward output tensor, datatype and format is same as
+ * input result. \n
+
+ * @attention Constraints:
+ * This operator only supports dataType: float16/float32, format: ND/5HD. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator HardtanhGrad.
+ */
+REG_OP(HardtanhGrad)
+    .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
+    .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT }))   /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))     /* "Result, has same element type as two inputs" */
+    .ATTR(min_val, Float, -1.0)
+    .ATTR(max_val, Float, 1.0)
+    .OP_END_FACTORY_REG(HardtanhGrad)
+
+/**
+* @brief Calculates the softplus loss function with attributes of beta and threshold. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* @li y: A mutable Tensor. Has the same type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softplus.
+*/
+REG_OP(SoftplusV2)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2)
+
+/**
+* @brief Calculates the reversed outputs of the function "softplus_v2". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_gradients: A mutable Tensor. Must be one of the following types:
+*     float16, float32.
+* @li input_features: A mutable Tensor of the same type as "input_gradients" \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftplusGrad.
+*/
+REG_OP(SoftplusV2Grad)
+    .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2Grad)
+
+/**
+ * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
+ *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
+ * 
+ * @par inputs
+ * one input including:
+ * @li x: input A Tensor. Must be one of the following types: float32, float16
+ * 
+ * @par output
+ * one output including:
+ * @li y:A Tensor of the same type as x
+ * 
+ */
+REG_OP(ThresholdedRelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(ThresholdedRelu)
+
+/**
+* @brief Calculate the hard shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardshrink. \n
+*/
+REG_OP(HardShrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .OP_END_FACTORY_REG(HardShrink)
+
+/**
+* @brief Calculate the hard sigmoid function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li alpha: An optional float. Defaults to 0.16666666. \n
+* @li beta: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardsigmoid. \n
+*/    
+REG_OP(HardSigmoid)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 0.16666666)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(HardSigmoid)
+
+/**
+* @brief Calculate the soft shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softshrink. \n
+*/
+REG_OP(SoftShrink)
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrink)
+
+/**
+* @brief Calculate the reversed outputs of the function "soft_shrink". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_grad: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li input_x: A tensor of the same dtype as "input_grad". \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor of the same dtype and shape as "input_graxd". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftShrinkGrad. \n
+*/
+REG_OP(SoftShrinkGrad)
+     .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrinkGrad)
+	 
+/**
+*@brief Calculate -ln(1+e^(-x)). \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li y: A tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoid. \n
+*/
+REG_OP(LogSigmoid)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
+    .OP_END_FACTORY_REG(LogSigmoid)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h
new file mode 100644
index 00000000..f36d2935
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file npu_loss_scale_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Computes NPU alloc float status operator function . \n
+
+*@par Outputs:
+*data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUAllocFloatStatusOperator)
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUAllocFloatStatusOperator)
+
+/**
+*@brief Computes NPU clear float status operator function . \n
+
+*@par Inputs:
+*addr: A Tensor of data memory address. Must be float32 . \n
+
+*@par Outputs:
+*data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUClearFloatStatusOperator)
+    .INPUT(addr, TensorType{DT_FLOAT})
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUClearFloatStatusOperator)
+
+/**
+*@brief Computes NPU get float status operator function . \n
+
+*@par Inputs:
+*addr: A Tensor of data memory address. Must be float32 . \n
+
+*@par Outputs:
+*data: A Tensor of data value. Must be float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUGetFloatStatusOperator)
+    .INPUT(addr, TensorType{DT_FLOAT})
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUGetFloatStatusOperator)
+
+/**
+*@brief Produces a variable with 0 in memory . \n
+
+*@par Outputs:
+*y: A Tensor of type int32, output eight numbers with a value of zero.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUAllocFloatStatus)
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUAllocFloatStatus)
+
+/**
+*@brief Set the value of address 0x40000 to 0 in each core . \n
+
+*@par Inputs:
+*addr: A tensor of type float32 . \n
+
+*@par Outputs:
+*data: A Tensor of type float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUClearFloatStatus)
+    .INPUT(addr, TensorType{DT_FLOAT})
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUClearFloatStatus)
+
+/**
+*@brief Get the value of address 0x40000 . \n
+
+*@par Inputs:
+*addr: A tensor of type float32 . \n
+
+*@par Outputs:
+*data: A Tensor of type float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(NPUGetFloatStatus)
+    .INPUT(addr, TensorType{DT_FLOAT})
+    .OUTPUT(data, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(NPUGetFloatStatus)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/inc/ops/outfeed_ops.h
new file mode 100644
index 00000000..53b9d701
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/outfeed_ops.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file outfeed_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_
+
+#include "data_flow_ops.h"
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_
+
diff --git a/third_party/fwkacllib/inc/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/inc/ops/pad_ops.h
new file mode 100644
index 00000000..8d71c5cd
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/pad_ops.h
@@ -0,0 +1,445 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file pad_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
+
+#include "graph/operator_reg.h"
+namespace ge {
+
+/**
+*@brief Creates a tensor filled with a scalar value.
+* This operation creates a tensor of shape "dims" and fills it with "value".
+*
+*@par Inputs:
+*@li dims: A 1D tensor of types int32 or int64. Represents the shape of the output tensor . \n
+
+*@li value: A 0D scalar. Specifies the value to fill the returned tensor.
+*    Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "value".
+*
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator Fill.
+*@li Compatible with the Caffe operator Filler.
+*
+*/
+REG_OP(Fill)
+    .INPUT(dims, TensorType::IndexNumberType())
+    .INPUT(value, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Fill)
+
+/**
+*@brief Creates a tensor filled with a scalar value.
+* This operation creates a tensor of shape "dims" and fills it with "value".
+*
+*@par Inputs:
+* value: A 0D scalar for the value to fill the returned tensor. Must be one of
+*    the following types:
+*    float16, float32, uint8, int8, int16, int32, int64, quint8, qint8, qint32
+*
+*@par Attributes:
+* dims: A tensor. Must be one of the following types:"int32"
+*     1-D. Represents the shape of the output tensor.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "value".
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead.
+*/
+REG_OP(FillD)
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                              DT_UINT16, DT_UINT8, DT_INT32, DT_INT64,
+                              DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
+                           DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+                           DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .REQUIRED_ATTR(dims, ListInt)
+    .OP_END_FACTORY_REG(FillD)
+
+/**
+*@brief Broadcasts an array for a compatible shape.
+*  Broadcasting is the process of making arrays to have compatible shapes
+*  for arithmetic operations. Two shapes are compatible if for each
+*  dimension pair they are either equal or one of them is one. When trying
+*  to broadcast a Tensor to a shape, it starts with the trailing dimensions,
+*  and works its way forward.
+*
+*@par Inputs:
+*@li x: A tensor.
+*@li shape: A tensor of type int32.
+*     A 1D tensor of type int32, for the shape of the desired output.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BroadcastTo.
+*
+*/
+REG_OP(BroadcastTo)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(shape, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(BroadcastTo)
+
+/**
+*@brief Broadcasts an array for a compatible shape.
+*  Broadcasting is the process of making arrays to have compatible shapes
+*  for arithmetic operations. Two shapes are compatible if for each
+*  dimension pair they are either equal or one of them is one. When trying
+*  to broadcast a Tensor to a shape, it starts with the trailing dimensions,
+*  and works its way forward.
+*
+*@par Inputs:
+* x: A tensor. A tensor to broadcast.
+*
+*@par Attributes:
+* shape: A tensor of type int32.
+*     A 1D tensor of type int32, for the shape of the desired output.
+*
+*@par Outputs:
+* y: A tensor. Has the same type as "x".
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator BroadcastTo.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead.
+*/
+REG_OP(BroadcastToD)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(BroadcastToD)
+
+/**
+*@brief Pads a tensor . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li paddings: A Tensor of type int32 or int64 . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator Pad.
+*/
+REG_OP(Pad)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Pad)
+
+/**
+*@brief Pads a tensor . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+
+*@par Attributes:
+*paddings: An optional "vector<vector<int>>". Defaults to "{}".
+*     For each dimension D of input, paddings[D, 0] indicates how many
+*     values to add before the contents of tensor in that dimension,
+*     and paddings[D, 1] indicates how many values to add after the
+*     contents of tensor in that dimension . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator Pad.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
+*/
+REG_OP(PadD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(paddings, ListListInt)
+    .OP_END_FACTORY_REG(PadD)
+
+/**
+*@brief Pads a tensor . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li constant_values: A Tensor. Must have the same type as input.
+* @li paddings: A Tensor of type int32 or int64 . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator Pad.
+*/
+REG_OP(PadV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .INPUT(constant_values, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(PadV2)
+
+/**
+*@brief Pads a tensor . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+*constant_values: A Tensor. Must have the same type as input.
+
+*@par Attributes:
+*paddings: An optional "vector<vector<int>>". Defaults to "{}".
+*     For each dimension D of input, paddings[D, 0] indicates how many
+*     values to add before the contents of tensor in that dimension,
+*     and paddings[D, 1] indicates how many values to add after the
+*     contents of tensor in that dimension . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator PadV2.
+*/
+REG_OP(PadV2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(constant_values, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(paddings, ListListInt)
+    .OP_END_FACTORY_REG(PadV2D)
+
+/**
+*@brief Pads a tensor.
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64.
+* @li paddings: A Tensor of type int32 or int64.
+* @li constant_values: A optional Tensor of int32 or int64
+
+*@par Attributes:
+* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+*     support "constant", "reflect", "edge"
+* @li paddings_contiguous: An optional bool value, Defaults to true.
+*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Third-party framework compatibility:
+* Compatible with ONNX operator Pad.
+*/
+REG_OP(PadV3)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(constant_values, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(mode, String, "constant")
+    .ATTR(paddings_contiguous, Bool, true)
+    .OP_END_FACTORY_REG(PadV3)
+
+/**
+*@brief Pads a tensor.
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+
+*@par Attributes:
+* @li paddings: An required "vector<vector<int>>".
+*     For each dimension D of input, paddings[D, 0] indicates how many
+*     values to add before the contents of tensor in that dimension,
+*     and paddings[D, 1] indicates how many values to add after the
+*     contents of tensor in that dimension.
+* @li constant_values: An optional int value for pad.
+* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+*     support "constant", "reflect", "edge"
+* @li paddings_contiguous: An optional bool value, Defaults to true.
+*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Third-party framework compatibility:
+* Compatible with ONNX operator Pad.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
+*/
+REG_OP(PadV3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(paddings, ListListInt)
+    .ATTR(constant_values, Int, 0)
+    .ATTR(mode, String, "constant")
+    .ATTR(paddings_contiguous, Bool, true)
+    .OP_END_FACTORY_REG(PadV3D)
+
+/**
+*@brief Create a diagonal tensor
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32, int32 . \n
+
+* @li assist: A mutable Tensor with rank k is at most 1,
+*     Has the same type as "x" . \n
+
+*@par Outputs:
+*y: A mutable Tensor. Has the same type as "x" . \n
+
+*@see Diag()
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead.
+*/
+REG_OP(DiagD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(DiagD)
+
+/**
+*@brief Create a diagonal tensor
+
+*@par Inputs:
+*One input, include:
+* x: A mutable Tensor with rank k, where k is at most 1. Must be one of the
+*     following types:
+*     float16, float32, double, int32, int64, complex64, complex128 . \n
+
+*@par Outputs:
+*y: A mutable Tensor. Has the same type as "x" . \n
+
+*@see DiagD()
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*/
+REG_OP(Diag)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                          DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
+                           DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(Diag)
+
+/**
+*@brief Ascend Padding, pad the last dimension of input
+
+*@par Inputs:
+*One input, include:
+*x: Tensor which last dimension must be 1. For example: [624000, 1] . \n
+
+*@par Outputs:
+*y: Padding the last dimension of x to padDimSize, [624000, padDimSize] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*/
+REG_OP(AscendPadding)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(pad_dim_size, Int, 8)
+    .OP_END_FACTORY_REG(AscendPadding)
+
+
+/**
+*@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n
+
+*@par Inputs:
+*One input, include:
+*addr_table: Tensor which last dimension must be 3. For example: [8, 3].
+*index: Tensor  For example: [640000].
+*@par Outputs:
+*rank_id: Tensor the first dimension of index to Size, [size, 3].
+ Tensor which last dimension must be 3.For example: [640000, 3]
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*/
+REG_OP(EmbeddingRankId)
+    .INPUT(addr_table, TensorType({DT_UINT64}))
+    .INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT64}))
+    .OUTPUT(rank_id, TensorType({DT_UINT64}))
+    .ATTR(row_memory, Int, 320)
+    .ATTR(mode, String, "mod")
+    .OP_END_FACTORY_REG(EmbeddingRankId)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Inputs:
+* One inputs, including:
+* @li dims: An Tensor, specify the shape that the value to fill.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @par Outputs:
+* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2)
+    .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .OP_END_FACTORY_REG(FillV2)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @li dims: An required listInt to specify the shape that the value to fill.
+
+* @par Outputs:
+* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2D)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .REQUIRED_ATTR(dims, ListInt)
+    .OP_END_FACTORY_REG(FillV2D)
+} // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/inc/ops/parsing_ops.h
new file mode 100644
index 00000000..9a5cf504
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/parsing_ops.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file parsing_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Converts each string in the input Tensor to the specified numeric type . \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. Must be one of the following types: string . \n
+
+*@par Attributes:
+*out_type: The numeric type to interpret each string in string_tensor as . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for StringToNumber on Ascend uses AICPU, with bad performance. \n
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow StringToNumber operator.
+*/
+REG_OP(StringToNumber)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .ATTR(out_type, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(StringToNumber)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/inc/ops/quantize_ops.h
new file mode 100644
index 00000000..806e28df
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/quantize_ops.h
@@ -0,0 +1,224 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file quantize_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+* @brief Dequantizes the input tensor into a float tensor.
+* [min_range, max_range] are float32 tensors that specify the range
+* for "y".
+* The "mode" attribute controls exactly which calculations are used to convert
+* the float values to their quantized equivalents.
+* @par Inputs:
+* @li x: A Tensor. Must be one of the following types: int8, uint8,
+* int32.
+* @li min_range: A Tensor of type float32.
+* Specifies the minimum scalar value possibly produced for the input.
+* @li max_range: A Tensor of type float32.
+* Specifies the maximum scalar value possibly produced for the input . \n
+
+* @par Attributes:
+* mode: An optional string from: "MIN_COMBINED", "MIN_FIRST", and "SCALED".
+* Defaults to "MIN_COMBINED" . \n
+
+* @par Outputs:
+* y: A dictionary of type float32 . \n
+
+* @attention Constraints:
+* @li "min_range" and "max_range" have the same shapes.
+* @li "x" and "y" have the same shapes . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dequantize.
+*/
+REG_OP(Dequantize)
+    .INPUT(x, TensorType(DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16))
+    .INPUT(min_range, TensorType{DT_FLOAT})
+    .INPUT(max_range, TensorType{DT_FLOAT})
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .ATTR(mode, String, "MIN_COMBINED")
+    .OP_END_FACTORY_REG(Dequantize)
+
+/**
+*@brief Quantizes the input . \n
+
+*@par Inputs:
+*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n
+
+*@par Attributes:
+*@li scale: A required float32, specifying the scaling ratio.
+*@li offset: A required float16, specifying the offset.
+*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
+*@li round_mode: An optional string, specifying the float16 to int8 cast type.
+* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n
+
+*@par Outputs:
+*y: The quantized output tensor of type int8 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendQuant)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .OUTPUT(y, TensorType({DT_INT8}))
+    .REQUIRED_ATTR(scale, Float)
+    .REQUIRED_ATTR(offset, Float)
+    .ATTR(sqrt_mode, Bool, false)
+    .ATTR(round_mode, String, "Round")
+    .OP_END_FACTORY_REG(AscendQuant)
+
+/**
+*@brief Dequantizes the input . \n
+
+*@par Inputs:
+*@li x: An NC1HWC0 tensor of type int32, specifying the input.
+*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio . \n
+
+*@par Attributes:
+*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
+*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
+*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n
+
+*@par Outputs:
+*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendDequant)
+    .INPUT(x, TensorType({DT_INT32}))
+    .INPUT(deq_scale, TensorType({DT_FLOAT16, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(sqrt_mode, Bool, false)
+    .ATTR(relu_flag, Bool, false)
+    .ATTR(dtype, Int, DT_FLOAT)
+    .OP_END_FACTORY_REG(AscendDequant)
+
+/**
+*@brief Anti quantizes the input . \n
+
+*@par Inputs:
+*x: An NC1HWC0 tensor of type int8, specifying the input . \n
+
+*@par Attributes:
+*@li scale: A required float32 scale.
+*@li offset: A required float32 offset.
+*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
+*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n
+
+*@par Outputs:
+*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendAntiQuant)
+    .INPUT(x, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(scale, Float)
+    .REQUIRED_ATTR(offset, Float)
+    .ATTR(dtype, Int, DT_FLOAT)
+    .ATTR(sqrt_mode, Bool, false)
+    .OP_END_FACTORY_REG(AscendAntiQuant)
+
+/**
+*@brief Dequantizes the input of int16 . \n
+
+*@par Inputs:
+*@li x0: An NC1HWC0 tensor of type int32, specifying the input.
+*@li deq_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
+*@li x1: An NC1HWC0 tensor of type int16, specifying the input . \n
+
+*@par Attributes:
+*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+
+*@par Outputs:
+*y: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendDequantS16)
+  .INPUT(x0, TensorType({DT_INT32}))
+  .INPUT(deq_scale, TensorType({DT_UINT64}))
+  .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
+  .OUTPUT(y, TensorType({DT_INT16}))
+  .ATTR(relu_flag, Bool, false)
+  .OP_END_FACTORY_REG(AscendDequantS16)
+
+/**
+*@brief Requantizes the input . \n
+
+*@par Inputs:
+*@li x: An NC1HWC0 tensor of type int32, specifying the input.
+*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio . \n
+
+*@par Attributes:
+*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+
+*@par Outputs:
+*y: The dequantized output tensor of type int8 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendRequant)
+  .INPUT(x, TensorType({DT_INT32}))
+  .INPUT(req_scale, TensorType({DT_UINT64}))
+  .OUTPUT(y, TensorType({DT_INT8}))
+  .ATTR(relu_flag, Bool, false)
+  .OP_END_FACTORY_REG(AscendRequant)
+
+/**
+*@brief Requantizes the input of int16 . \n
+
+*@par Inputs:
+*@li x: An NC1HWC0 tensor of type int16, specifying the input.
+*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
+*@li x1: An NC1HWC0 tensor of type int16 . \n
+
+*@par Attributes:
+*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False".
+*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
+
+*@par Outputs:
+*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
+*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
+
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(AscendRequantS16)
+  .INPUT(x, TensorType({DT_INT16}))
+  .INPUT(req_scale, TensorType({DT_UINT64}))
+  .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
+  .OUTPUT(y, TensorType({DT_INT8}))
+  .OUTPUT(y1, TensorType({DT_INT16}))
+  .ATTR(dual_output, Bool, false)
+  .ATTR(relu_flag, Bool, false)
+  .OP_END_FACTORY_REG(AscendRequantS16)
+
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h
new file mode 100644
index 00000000..20484623
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file ragged_array_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Gather ragged slices from `params` axis `0` according to `indices` . \n
+
+*@par Inputs:
+*@li params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
+*params` RaggedTensor input. It's a dynamic input.
+*@li params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
+*at the python level from dense_values to flat_values, so dense_values is the
+*deprecated name.
+*@li indices: Indices in the outermost dimension of `params` of the values that should be
+*gathered.
+*@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
+*this number of `row_splits` tensors. This value should equal
+*`indices.shape.ndims + params.ragged_rank - 1` . \n
+
+*@par Outputs:
+*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
+*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow RaggedGather operator.
+*/
+
+REG_OP(RaggedGather)
+    .DYNAMIC_INPUT(params_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(params_dense_values, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT32, DT_INT64}))
+    .DYNAMIC_OUTPUT(output_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output_dense_values, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(Tsplits, Type)
+    .ATTR(PARAMS_RAGGED_RANK, Int, 1)
+    .ATTR(OUTPUT_RAGGED_RANK, Int, 0)
+    .OP_END_FACTORY_REG(RaggedGather)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h
new file mode 100644
index 00000000..020e3da4
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file ragged_conversion_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Converts a RaggedTensor into a SparseTensor with the same values . \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type
+in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input.
+*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor
+Must be one of the following types: bool, int8, int16, uint16, int32,
+int64, double, float, float16 . \n
+
+*@par Attributes:
+*@li RAGGED_RANK: the dynamic of input rt_nested_splits with type int.
+*@li Tsplits: A required attribute, the type is int64 . \n
+
+*@par Outputs:
+*@li sparse_indices: A Tensor of type int64.
+*@li sparse_values: A Tensor. Has the same type as rt_dense_values.
+*@li sparse_dense_shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator RaggedTensorToSparse.
+*/
+REG_OP(RaggedTensorToSparse)
+    .DYNAMIC_INPUT(rt_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(rt_dense_values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(sparse_indices, TensorType({DT_INT64}))
+    .OUTPUT(sparse_values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(sparse_dense_shape, TensorType({DT_INT64}))
+    .ATTR(RAGGED_RANK, Int, 1)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(RaggedTensorToSparse)
+
+/**
+*@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n
+
+*@par Inputs:
+*Six inputs, including:
+*@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
+*@li values:A 1D tensor representing the values of the ragged tensor.
+*@li default_value:A `Tensor`. Must have the same type as `values`.
+*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same
+type in: `int64`, `int32` . It's a dynamic input.\n
+
+*@par Attributes:
+*@li num_row_partition_tensors:Numbers of row partition tensors.
+*@li row_partition_types: A list of `strings`.
+The types of the row partition tensors. At present, these can be:
+* "ROW_SPLITS": the row_splits tensor from the ragged tensor.
+* "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor.
+* "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it
+is preceeded by "FIRST_DIM_SIZE" . \n
+
+*@par Outputs:
+*@li result: A `Tensor`. Has the same type as `values`.
+*/
+REG_OP(RaggedTensorToTensor)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                          DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .INPUT(default_value, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16,
+              DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .DYNAMIC_INPUT(row_partition_tensors, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(result, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                          DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(num_row_partition_tensors, Int)
+    .REQUIRED_ATTR(row_partition_types, ListString)
+    .OP_END_FACTORY_REG(RaggedTensorToTensor)
+
+
+} // namespace ge
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h
new file mode 100644
index 00000000..258b0ca1
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file ragged_math_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Returns a `RaggedTensor` containing the specified sequences of numbers . \n
+
+*@par Inputs:
+*@li starts: The starts of each range.
+*@li limits: The limits of each range.
+*@li deltas: The deltas of each range . \n
+
+*@par Outputs:
+*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n
+
+*@attention Constraints:
+*The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
+*The vector inputs must all have the same size.  Scalar inputs are broadcast
+*to match the size of the vector inputs . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow RaggedRange operator.
+*/
+
+REG_OP(RaggedRange)
+    .INPUT(starts, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
+    .INPUT(limits, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
+    .INPUT(deltas, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
+    .OUTPUT(rt_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(rt_dense_values, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
+    .REQUIRED_ATTR(Tsplits, Type)
+    .OP_END_FACTORY_REG(RaggedRange)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/random_ops.h b/third_party/fwkacllib/inc/inc/ops/random_ops.h
new file mode 100644
index 00000000..e2b00ce3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/random_ops.h
@@ -0,0 +1,554 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file random_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
+
+#include <vector>
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Draws samples from a multinomial distribution . \n
+
+*@par Inputs:
+*Inputs include:
+* @li logits: A Tensor. Must be one of the following types: float16, float, double.
+2-D Tensor with shape [batch_size, num_classes].
+* @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n
+
+*@par Attributes:
+*@li output_dtype: An optional type from: int32, int64. Defaults to int64.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y_indices: A Tensor of type output_dtype . \n
+
+*@attention Constraints:
+*The implementation for Multinomial on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow Multinomial operator.
+*/
+REG_OP(Multinomial)
+    .INPUT(logits, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(num_samples, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Type, DT_INT64)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(Multinomial)
+
+/**
+*@brief Outputs random values from a normal distribution . \n
+
+*@par Inputs:
+*Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64.
+      The shape of the output tensor. Batches are indexed by the 0th dimension.
+* @li means: A Tensor. Must be one of the following types: half, bfloat16, float32, float64.
+* @li stdevs: A Tensor. Must have the same type as means.
+* @li min: A Tensor. Must have the same type as means. The minimum cutoff. May be -infinity.
+* @li max: A Tensor. Must have the same type as means . \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as means . \n
+
+*@attention Constraints:
+*The implementation for ParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow ParameterizedTruncatedNormal operator.
+*/
+REG_OP(ParameterizedTruncatedNormal)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(means, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(stdevs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(min, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(max, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(ParameterizedTruncatedNormal)
+
+/**
+*@brief Computes the derivative of a Gamma random sample w.r.t. alpha . \n
+
+*@par Inputs:
+*Inputs include:
+* @li alpha: A Tensor. Must be one of the following types: float32, float64.
+* @li sample: A Tensor. Must have the same type as alpha . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as alpha . \n
+
+*@attention Constraints:
+*The implementation for RandomGammaGrad on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomGammaGrad operator.
+*/
+REG_OP(RandomGammaGrad)
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(sample, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(RandomGammaGrad)
+
+/**
+*@brief Outputs random values from the Gamma distribution(s) described by alpha . \n
+
+*@par Inputs:
+*Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64. 1-D integer tensor.
+* @li alpha: A Tensor. Must be one of the following types: half, float32, float64 . \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as alpha . \n
+
+*@attention Constraints:
+*The implementation for RandomGamma on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomGamma operator.
+*/
+REG_OP(RandomGamma)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomGamma)
+
+/**
+*@brief Outputs random values from the Poisson distribution(s) described by rate . \n
+
+*@par Inputs:
+*Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64. 1-D integer tensor.
+* @li rate: A Tensor. Must be one of the following types: half, float32, float64, int32, int64 . \n
+
+*@par Attributes:
+*@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor of type dtype . \n
+
+*@attention Constraints:
+*The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomPoisson operator.
+*/
+REG_OP(RandomPoisson)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(rate, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_INT32, DT_INT64}))
+    .ATTR(dtype, Type, DT_INT64)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomPoisson)
+
+/**
+*@brief Randomly shuffles a tensor along its first dimension . \n
+
+*@par Inputs:
+*Inputs include:
+*x: A Tensor. The tensor to be shuffled . \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomShuffle operator.
+*/
+REG_OP(RandomShuffle)
+    .INPUT(x, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16,
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+        DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16,
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+        DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomShuffle)
+
+/**
+*@brief Outputs random values from a normal distribution . \n
+
+*@par Inputs:
+*Inputs include:
+*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor . \n
+
+*@par Attributes:
+*@li dtype: A type from: half, float16, float32, float64. The type of the output.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor of type dtype . \n
+
+*@attention Constraints:
+*The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomStandardNormal operator.
+*/
+REG_OP(RandomStandardNormal)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomStandardNormal)
+
+/**
+*@brief Outputs random integers from a uniform distribution . \n
+
+*@par Inputs:
+*Inputs include:
+* @li shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor.
+* @li min: A Tensor. Must be one of the following types: int32, int64. 0-D.
+* @li max: A Tensor. Must have the same type as minval. 0-D . \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as min . \n
+
+*@attention Constraints:
+*The implementation for RandomUniformInt on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomUniformInt operator.
+*/
+REG_OP(RandomUniformInt)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(min, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(max, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomUniformInt)
+
+/**
+*@brief Outputs random values from a uniform distribution . \n
+
+*@par Inputs:
+*Inputs include:
+*shape: A Tensor. Must be one of the following types: int32, int64. The shape of the output tensor . \n
+
+*@par Attributes:
+*@li dtype: A type from: half, float16, float32, float64. The type of the output.
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*y: A Tensor of type dtype . \n
+
+*@attention Constraints:
+*The implementation for RandomUniform on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow RandomUniform operator.
+*/
+REG_OP(RandomUniform)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomUniform)
+
+/**
+*@brief Outputs random values from a truncated normal distribution . \n
+
+*@par Inputs:
+*Inputs include:
+*shape: A Tensor. Must be one of the following types: int32, int64 . \n
+
+*@par Attributes:
+*@li seed: An optional int. Defaults to 0.
+*@li seed2: An optional int. Defaults to 0 . \n
+
+*@par Outputs:
+*size: A Tensor of types: float16, float32, double . \n
+
+*@attention Constraints:
+*The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow TruncatedNormal operator.
+*/
+REG_OP(TruncatedNormal)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(TruncatedNormal)
+
+/**
+*@brief Generate random bit mask for dropout . \n
+
+*@par Inputs:
+include:
+*@li shape:The shape of the output tensor.
+*@li prob:0-D. Number of bit 1 . \n
+
+*@par Attributes:
+*@li seed:If either seed or seed2 are set to be non-zero, the random number
+*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2:A second seed to avoid seed collision . \n
+
+*@par Outputs:
+*y:Output (1-D) random number using uint data format . \n
+
+*@attention Constraints:
+*The output is aligned with 128 bits
+
+*@see DropOutGenMask()
+*/
+REG_OP(DropOutGenMask)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_UINT8 }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(DropOutGenMask)
+
+/**
+*@brief Generates values in an interval . \n
+
+*@par Inputs:
+* Four ND inputs, including:
+*@li assist: A 1D Tensor of type float32.
+*@li start: A 1D Tensor of type float32, for the first entry in the range.
+*@li stop: A 1D Tensor of type float32, for the last entry in the range.
+*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries . \n
+
+*@par Outputs:
+*output_op: A 1D Tensor of type float32 . \n
+
+*@attention Constraints:
+* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator lin_space.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead.
+*/
+REG_OP(LinSpaceD)
+    .INPUT(assist, TensorType({DT_FLOAT}))
+    .INPUT(start, TensorType({DT_FLOAT}))
+    .INPUT(stop, TensorType({DT_FLOAT}))
+    .INPUT(num, TensorType::IndexNumberType())
+    .OUTPUT(output, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(LinSpaceD)
+
+/**
+*@brief Generates values in an interval . \n
+
+*@par Inputs:
+* Four ND inputs, including:
+*@li start: A 1D Tensor of type float32, for the first entry in the range.
+*@li stop: A 1D Tensor of type float32, for the last entry in the range.
+*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries . \n
+
+*@par Outputs:
+*output_op: A 1D Tensor of type float32 . \n
+
+*@attention Constraints:
+* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator lin_space.
+*/
+REG_OP(LinSpace)
+    .INPUT(start, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(stop, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(num, TensorType::IndexNumberType())
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(LinSpace)
+
+
+
+/**
+*@brief The dropout operator randomly sets (according to the given dropout probability)
+*the outputs of some units to zero, while others are remain unchanged. . \n
+
+*@par Inputs:
+*One input, including:
+*@li x:The input tensor variable. The data type is float32. \n
+
+*@par Attributes:
+*@li dropout_ratio:Float between 0 and 1. Fraction of the input units to drop.Defaults to "0.5".
+*@li scale_train: Bool,default to true.
+*@li alpha: An optional float32. A scaling factor. Defaults to "1.0".
+*@li beta: An optional float32. An exponent. Defaults to "0.0". \n
+
+*@par Outputs:
+*y: A Variable holding Tensor representing the dropout, has same shape and data type with x. \n
+*/
+REG_OP(Dropout)
+    .INPUT(x, TensorType{DT_FLOAT})
+    .OUTPUT(y, TensorType{DT_FLOAT})
+    .ATTR(dropout_ratio, Float, 0.5)
+    .ATTR(scale_train, Bool, true)
+    .ATTR(alpha, Float, 1.0)
+    .ATTR(beta, Float, 0.0)
+    .OP_END_FACTORY_REG(Dropout)
+
+/**
+*@brief Shuffle index of no-zero element . \n
+
+*@par Inputs:
+include:
+*x:A tensor <= 5-D . \n
+
+*@par Attributes:
+*@li count:the count of output, if 0, out all no-zero elements.
+*@li seed:If either seed or seed2 are set to be non-zero, the random number generator is seeded by the given seed.
+          Otherwise, it is seeded by a random seed.
+*@li seed2:A second seed to avoid seed collision . \n
+
+*@par Outputs:
+*@li y:2-D tensor, no-zero element index.
+*@li mask:1-D, whether the corresponding index is valid . \n
+
+*@see RandomChoiceWithMask()
+*/
+REG_OP(RandomChoiceWithMask)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OUTPUT(mask, TensorType({DT_BOOL}))
+    .ATTR(count, Int, 0)
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(RandomChoiceWithMask)
+
+/**
+*@brief Permutes data in the channel dimension of the input
+
+*@par Inputs:
+*Inputs including:
+* @li x: A required Tensor. Must be one of the following types:
+         float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+
+*@par Attributes:
+*@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
+
+*@par Outputs:
+*y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
+    float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
+
+*@attention Constraints:
+*@li "group" must be greater than 0 and must evenly divide the channel dimension size.
+*@li The format of input "x" must be NCHW.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator ShuffleChannel.
+*/
+REG_OP(ShuffleChannel)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16,
+                          DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16,
+                           DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
+    .ATTR(group, Int, 1)
+    .OP_END_FACTORY_REG(ShuffleChannel)
+
+/**
+ * @briefGenerate a tensor of samples from a multinomial 
+ * distribution according to the probabilities of each of 
+ * the possible outcomes.
+ * 
+ * @par inputs
+ * one input including:
+ * @li x:Input tensor with shape [batch_size, class_size], 
+ * where class_size is the number of all possible outcomes.
+ * Each value along the axis zero represents the unnormalized 
+ * log-probability of each corresponding outcome in a batch.
+ * 
+ * @par output
+ * one output including:
+ * @li y:Output tensor with shape [batch_size, sample_size], 
+ * where sample_size is the number of times to sample. 
+ * Each value along the axis zero represents the outcome of 
+ * the corresponding sample in a batch.
+ * 
+ */
+REG_OP(MultinomialFuss)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Int, 6)
+    .ATTR(sample_size, Int, 1)
+    .ATTR(seed, Float, 0)
+    .OP_END_FACTORY_REG(MultinomialFuss)
+
+/**
+* @brief During training, randomly zeroes some of the elements of the input tensor
+* with probability
+*
+* @par Inputs:
+* @li x: A ND Tensor. Must be one of the following data types: Float, Float16
+* @li seed: A ND Tensor. Must be one of the following data types: Float
+*
+* @par Attributes:
+* @li p: probability of an element to be zeroed
+*
+* @par Outputs:
+* @li y: A tensor with the same shape and type as "x".
+* @li mask: A tensor with the same shape and type as "x".
+* @li new_seed: A tensor with the same shape and type as "seed".
+*/
+
+REG_OP(DropoutV2)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .INPUT(seed, TensorType({ DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(mask, TensorType({ DT_FLOAT }))
+    .OUTPUT(seed, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(p, Float)
+    .OP_END_FACTORY_REG(DropoutV2)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/inc/ops/reduce_ops.h
new file mode 100644
index 00000000..5b97d226
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/reduce_ops.h
@@ -0,0 +1,1173 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file reduce_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
+
+*@par Outputs:
+*@li sum: A 1D Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A 1D Tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a BatchNorm fusion operator for updating the moving
+* averages for training.
+* This operator is used in conjunction with BNTrainingReduce.
+*/
+REG_OP(BNTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNTrainingReduce)
+
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n
+
+*@par Outputs:
+*@li sum: A 3D Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a BatchNorm fusion operator for updating the moving
+* averages for training.
+* This operator is used in conjunction with BN3DTrainingReduce.
+*/
+REG_OP(BN3DTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingReduce)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for
+* the gradient.
+*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
+*@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0,
+* for the mean of "x".
+*@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0,
+* for the variance of "x".
+*@li scale: A 5D Tensor of type float32, with format NC1HWC0.
+*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset
+* of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BNTrainingUpdateGrad . \n
+
+*@see BNTrainingUpdateGrad
+*/
+REG_OP(BNTrainingReduceGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(diff_scale, TensorType({DT_FLOAT}))
+    .INPUT(diff_offset, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BNTrainingReduceGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
+* the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x".
+*@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
+* of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n
+
+*@see BN3DTrainingReduceGrad
+*/
+REG_OP(BN3DTrainingReduceGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(diff_scale, TensorType({DT_FLOAT}))
+    .INPUT(diff_offset, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
+
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+* Seven inputs, including: (NC1HWC0 supported)
+*@li x: A 5D Tensor of type float16 or float32.
+*@li sum: A 1D Tensor of type float32 for the output of operator
+* BNTrainingReduce.
+*@li square_sum: A 1D Tensor of type float32 for the output of operator
+* BNTrainingReduce.
+*@li scale: A 1D Tensor of type float32, for the scaling factor.
+*@li offset: A 1D Tensor of type float32, for the scaling offset.
+*@li mean: A 1D Tensor of type float32, for the updated mean.
+*@li variance: A 1D Tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero.
+*@li factor: A required float32, specifying the weight for updating the mean
+* and variance . \n
+
+*@par Outputs:
+* Five outputs, including: (NC1HWC0 supported)
+*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
+*@li mean: A 5D Tensor of type float32, for the updated mean.
+*@li variance: A 5D Tensor of type float32, for the updated variance.
+*@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*@li This operator is a BatchNorm fusion operator for updating the moving
+averages for training.
+*This operator is used in conjunction with BNTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
+* root instruction.
+*/
+REG_OP(BNTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(factor, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNTrainingUpdate)
+
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+* Seven inputs, including: (NDC1HWC0 supported)
+*@li x: A 6D Tensor of type float16 or float32.
+*@li sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li square_sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li scale: A 6D Tensor of type float32, for the scaling factor.
+*@li offset: A 6D Tensor of type float32, for the scaling offset.
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero.
+*@li factor: A required float32, specifying the weight for updating the mean
+* and variance . \n
+
+*@par Outputs:
+* Five outputs, including: (NDC1HWC0 supported)
+*@li y: A 6D Tensor of type float16 or float32, for normalized "x".
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance.
+*@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*@li This operator is a BatchNorm fusion operator for updating the moving
+averages for training.
+*This operator is used in conjunction with BN3DTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
+* root instruction.
+*/
+REG_OP(BN3DTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(factor, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdate)
+
+/**
+*@brief Performs batch normalization for inference . \n
+
+*@par Inputs:
+* Five inputs, including: (NC1HWC0 supported)
+*@li x: A 5D Tensor of type float16 or float32.
+*@li scale: A 5D Tensor of type float32, for the scaling factor.
+*@li offset: A 5D Tensor of type float32, for the scaling offset.
+*@li mean: A 5D Tensor of type float32, for the mean.
+*@li variance: A 5D Tensor of type float32, for the variance . \n
+
+*@par Attributes:
+*epsilon: An optional float32, specifying the small value added to variance to
+* avoid dividing by zero. Defaults to "0.0001" . \n
+
+*@par Outputs:
+*y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n
+
+*@attention Constraints:
+*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root
+* instruction.
+*/
+REG_OP(BNInfer)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNInfer)
+
+/**
+*@brief Performs reduced batch normalization. For some scene which don't contain
+assignmoving average . \n
+
+*@par Inputs:
+*Five inputs, including: (NC1HWC0 supported)
+*@li x: A 5D Tensor of type float16 or float32.
+*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
+*@li scale: A 5D Tensor of type float32, for the scaling factor.
+*@li offset: A 5D Tensor of type float32, for the scaling offset . \n
+
+*@par Attributes:
+*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+
+*@par Outputs:
+*Three outputs, including: (NC1HWC0 supported)
+*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 5D Tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*This operator is used in conjunction with BNTrainingReduce.
+For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*/
+REG_OP(BNTrainingUpdateV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNTrainingUpdateV2)
+
+/**
+*@brief Performs reduced batch normalization v3. For some scene which don't contain
+assign moving average . \n
+
+*@par Inputs:
+* Five inputs, including: (NC1HWC0 supported)
+*@li x: A 5D Tensor of type float16 or float32.
+*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce.
+*@li scale: A 5D Tensor of type float32, for the scaling factor.
+*@li offset: A 5D Tensor of type float32, for the scaling offset . \n
+
+*@par Attributes:
+*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+
+*@par Outputs:
+*@li y: A 5D Tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A 5D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 5D Tensor of type float32, for the variance of "x".
+*@li reserve_1: A 5D Tensor of type float32, for the mean of batch "x". Has the same type as batch_mean.
+*@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n
+
+*@attention Constraints:
+*@li This operator is used in conjunction with BNTrainingReduce.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*/
+REG_OP(BNTrainingUpdateV3)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_2, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNTrainingUpdateV3)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0,
+* for the gradient.
+*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
+*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*@li diff_scale: A Tensor of type float32, with format NC1HWC0,
+* for the offset of "scale".
+*@li diff_offset: A Tensor of type float32, with format NC1HWC0,
+* for the offset of "offset" . \n
+
+*/
+REG_OP(BNTrainingUpdateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BNTrainingUpdateGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
+* for the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "scale".
+*@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "offset" . \n
+
+*/
+REG_OP(BN3DTrainingUpdateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm for inference . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li grads: A 5D Tensor of type loat16 or float32, with format NC1HWC0, for the gradient.
+*@li scale: A 5D Tensor of type float32, with format NC1HWC0.
+*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n
+
+*@par Outputs:
+*x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm.
+*/
+REG_OP(BNInferGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BNInferGrad)
+
+/**
+*@brief Computes the sum of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*     float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128.
+*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
+
+*@par Attributes:
+*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sum.
+*/
+REG_OP(ReduceSum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceSum)
+
+/**
+*@brief Computes the sum of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor. Up to 8D. Must be one of the following types: float16, float32. \n
+
+*@par Attributes:
+*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
+*@li keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sum.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead.
+*/
+REG_OP(ReduceSumD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceSumD)
+
+/**
+*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n
+
+*@par Inputs:
+*One input:
+*x: The boolean tensor to reduce . \n
+
+*@par Attributes:
+*@li keep_dims: A bool. If true, retains reduced dimensions with length 1.
+*@li axis: The dimensions to reduce. If None, reduces all dimensions.
+*Must be in the range [- rank (input_sensor), rank (input_sensor)) . \n
+
+*@par Outputs:
+*y: The reduced tensor . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReduceAll.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead.
+*/
+REG_OP(ReduceAllD)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceAllD)
+
+/**
+*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: The boolean tensor to reduce.
+*@li axis: A mutable Tensor. The dimensions to reduce. If None, reduces all dimensions. Must be in the range [- rank (input_sensor), rank (input_sensor)) . \n
+
+*@par Attributes:
+*keep_dims: A bool. If true, retains reduced dimensions with length 1 . \n
+
+*@par Outputs:
+*y: The reduced tensor . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReduceAll.
+*/
+REG_OP(ReduceAll)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceAll)
+
+/**
+*@brief  Reduce a tensor on a certain axis based on product. . \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: A mutable Tensor. Must be the type of NumberType.
+*@li axis: A mutable Tensor. The dimensions to reduce . \n
+
+*@par Attributes:
+*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReduceProd.
+*/
+REG_OP(ReduceProd)
+    .INPUT(x,TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y,TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceProd)
+
+/**
+*@brief Computes the product of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float16, float, int8, uint8 . \n
+
+*@par Attributes:
+*@li axes: A required int8, int16, int32, or int64. Specifies the dimensions to reduce. No default value.
+*@li keep_dims: An optional bool. If "True", retains reduced dimensions with length 1. Defaults to "False" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+* "keep_dims" is in the range [-rank(input_tensor), rank(input_tensor)] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReduceProd.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead.
+*/
+REG_OP(ReduceProdD)
+    .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
+    .OUTPUT(y,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceProdD)
+
+/**
+*@brief Reduces "x" along the dimensions according to "axis" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8.
+* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+*   - If None (the default), reduces all dimensions.
+*   - Must be in the range [-rank(x), rank(x)) . \n
+
+*@par Attributes:
+*keep_dims: A bool or NoneType.
+* - If true, retains reduced dimensions with length 1.
+* - If false, the rank of the tensor is reduced by 1 for each entry in axis.
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator ReduceMean.
+*/
+REG_OP(ReduceMean)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMean)
+
+/**
+*@brief Reduces "x" along the dimensions according to "axis" . \n
+
+*@par Inputs:
+*One input:
+* @li x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+* If None (the default), reduces all dimensions.
+* Must be in the range [-rank(x), rank(x)).
+*@li keep_dims: A bool or NoneType.
+* - If true, retains reduced dimensions with length 1.
+* - If false, the rank of the tensor is reduced by 1 for each entry in axis.
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator ReduceMean.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead.
+*/
+REG_OP(ReduceMeanD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMeanD)
+
+/**
+*@brief Returns the maximum of elements across dimensions of a Tensor . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A multi-dimensional Tensor of type float16, float32, or int16.
+*@li axes: A Scalar of type int32, specifying the axes information of the index with the maximum value . \n
+
+*@par Attributes:
+*keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false" . \n
+
+*@par Outputs:
+*y: A multi-dimensional Tensor, specifying the maximum value of the corresponding axis in the tensor. Has the same type as "x". (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".)
+
+*@attention Constraints:
+* The value range of "axes" is [-dims, dims - 1]. "dims" indicates the dimension length of "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Max.
+*/
+REG_OP(ReduceMax)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMax)
+
+/**
+*@brief Returns the maximum of elements across dimensions of a Tensor . \n
+
+*@par Inputs:
+*x: A multi-dimensional Tensor of type float16, float32, or int16 . \n
+
+*@par Attributes:
+* Two attributes, including:
+*@li axes: A required listint, specifying the axes information of the index with the maximum value.
+*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false" . \n
+
+*@par Outputs:
+*y: A multi-dimensional Tensor, specifying the maximum value of the corresponding axis in the tensor. Has the same type as "x". (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".)
+
+*@attention Constraints:
+* The value range of "axis" is [-dims, dims - 1]. "dims" indicates the dimension length of "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Max.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead.
+*/
+REG_OP(ReduceMaxD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8,
+                          DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_UINT8, DT_INT8,
+                           DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMaxD)
+
+/**
+*@brief Computes the minimum of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int8, uint8.
+*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None".
+
+*@par Attributes:
+*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False".
+
+*@par Outputs:
+*output_tensor: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n
+
+*@attention Constraints:
+* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator reduce_min.
+*/
+REG_OP(ReduceMin)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMin)
+
+/**
+*@brief Computes the minimum of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+*input_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n
+
+*@par Attributes:
+*@li axes: An optional int32, list, tuple, or NoneType value. Specifies the dimensions to reduce. Defaults to "None".
+*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False").
+
+*@par Outputs:
+*output_min: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n
+
+*@attention Constraints:
+* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator reduce_min.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
+*/
+REG_OP(ReduceMinD)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMinD)
+/**
+*@brief Computes the "logical or" of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+* If "axes" is None, all dimensions are reduced, and a
+* tensor with a single element is returned.
+*
+*@attention Constraints:
+* Only support bool
+*
+*@par Inputs:
+*@li x : The boolean tensor to reduce.
+*@li axes: The dimensions to reduce. If "None" (default), reduces all
+*          dimensions. Must be in the range "[-rank(x), rank(x))".
+*
+*@par Attributes:
+* keep_dims: If true, retains reduced dimensions with length 1.
+*
+*@par Outputs:
+* y: The reduced tensor
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator reduce_any.
+*
+*/
+REG_OP(ReduceAny)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceAny)
+/**
+*@brief Computes the "logical or" of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+* If "axis" is None, all dimensions are reduced, and a
+* tensor with a single element is returned.
+*
+*@attention Constraints:
+*  Only support bool
+*
+*@par Inputs:
+* x: The boolean tensor to reduce.
+*
+*@par Attributes:
+*@li axes: The dimensions to reduce. Must be in the range "[-rank(x), rank(x))".
+*@li keep_dims: If true, retains reduced dimensions with length 1.
+*
+*@par Outputs:
+* y: The reduced tensor
+*
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator reduce_any.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead.
+*/
+REG_OP(ReduceAnyD)
+    .INPUT(x, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceAnyD)
+
+/**
+*@brief Compute reduction on dimensions specified by "axis".
+*Four reduction operations are provided:
+*SUM     Computes the sum of elements across specified dimensions of a tensor.
+*ASUM    Computes the sum of absolute values of elements across specified dimensions of a tensor.
+*SUMSQ   Computes the sum of squares of elements across specified dimensions of a tensor.
+*SUMSQ   Computes the mean values of elements across specified dimensions of a tensor . \n
+
+*@par Inputs:
+*x: A Tensor of type float16 or float32
+
+*@par Attributes:
+*@li operation: An optional int32 from 1(SUM), 2(ASUM), 3(SUMSQ), and 4(MEAN),
+*specifying the reduction algorithm. Defaults to "1".
+*@li axis: An optional int32, specifying the first axis to reduce. Defaults to "0".
+*The value range is [-N, N-1], where N is the input tensor rank.
+*@li coeff: An optional float32, specifying the scale coefficient. Defaults to "1.0" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints: The Reduction operator supports type float16 only on the device chip.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Reduction.
+*/
+REG_OP(Reduction)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(operation, Int, 1)
+    .ATTR(axis, Int, 0)
+    .ATTR(coeff, Float, 1.0)
+    .OP_END_FACTORY_REG(Reduction);
+
+/**
+*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32.
+*@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n
+
+*@par Attributes:
+*keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n
+
+*@par Outputs:
+*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+
+*@attention Constraints:
+* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator EuclideanNorm.
+*/
+REG_OP(EuclideanNorm)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(EuclideanNorm)
+
+/**
+*@brief Computes the euclidean norm of elements across dimensions of a tensor . \n
+
+*@par Inputs:
+*input_min: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+
+*@par Attributes:
+*@li axes: An optional int32, list, tuple, or NoneType value. Specifies the dimensions to reduce. Defaults to "None".
+*@li keep_dims: An optional bool or NoneType value. If "True", reduced dimensions will be retained. Defaults to "None" (equivalent to "False") . \n
+
+*@par Outputs:
+*output_min: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+
+*@attention Constraints:
+* If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator EuclideanNorm.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead.
+*/
+REG_OP(EuclideanNormD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16}))
+    .ATTR(axes, ListInt, {})
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(EuclideanNormD)
+
+
+
+/**
+*@brief Performs instance normalization for inference . \n
+
+*@par Inputs:
+* Five inputs, including: (NC1HWC0 supported)
+*@li x: A Tensor of type float16 or float32.
+*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
+*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
+*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean.
+*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance . \n
+
+*@par Attributes:
+*epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero.
+Defaults to "0.00001" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32 for the normalized "x".
+*batch_mean: A Tensor of type float32 for the result mean.
+*batch_ variance: A Tensor of type float32 for the result variance . \n
+
+*@attention Constraints:
+*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
+*/
+REG_OP(INInferV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.00001)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INInferV2)
+
+/**
+*@brief Performs reduced instance normalization . \n
+
+*@par Inputs:
+*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n
+
+*@par Outputs:
+*@li sum: A Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a InstanceNorm fusion operator for updating the moving averages for training.
+* This operator is used in conjunction with INTrainingUpdateV2.
+*/
+REG_OP(INTrainingReduceV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingReduceV2)
+
+
+/**
+*@brief Performs update instance normalization . \n
+
+*@par Inputs:
+* Seven inputs, including: (NC1HWC0supported)
+*@li x: A Tensor of type float16 or float32.
+*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2.
+*@li square_sum: A  [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2.
+*@li gamma: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
+*@li beta: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
+*@li mean: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean.
+*@li variance: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li momentum: A required float32, specifying the momentum to update mean and var.
+*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+
+*@par Outputs:
+* Three outputs, including: (NC1HWC0 supported)
+*@li y: A Tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A Tensor of type float32, for the updated mean.
+*@li batch_variance: A Tensor of type float32, for the updated variance . \n
+
+*@attention Constraints:
+*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
+* This operator is used in conjunction with INTrainingReduceV2.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*/
+REG_OP(INTrainingUpdateV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .ATTR(momentum, Float, 0.1)
+    .ATTR(epsilon, Float, 0.00001)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(INTrainingUpdateV2)
+
+
+/**
+*@brief Performs reduced group normalization . \n
+
+*@par Inputs:
+*x: A Tensor of type float16 or float32, with format NCHW NHWC . \n
+
+*@par Outputs:
+*@li sum: A Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x".
+
+
+*@par Attributes:
+*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
+
+*@attention Constraints:
+* This operator is a GroupNorm fusion operator for updating the moving averages for training.
+* This operator is used in conjunction with GNTrainingUpdate.
+*/
+REG_OP(GNTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .ATTR(num_groups, Int, 2)
+    .OP_END_FACTORY_REG(GNTrainingReduce)
+
+
+/**
+*@brief Performs update group normalization . \n
+
+*@par Inputs:
+* Eight inputs, including: (NCHW NHWC supported)
+*@li x: A Tensor of type float16 or float32.
+*@li sum: A 5D Tensor of type float32,
+shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
+for the output of operator GNTrainingReduce.
+*@li square_sum: A 5D Tensor of type float32,
+shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
+for the output of operator GNTrainingReduce.
+*@li scale: A 5D Tensor of type float32,
+shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
+is for the scaling gamma.
+*@li offset: A 5D Tensor of type float32,
+shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC
+for the scaling beta.
+*@li mean: A 5D Tensor of type float32,
+shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
+for the updated mean.
+*@li variance: A 5D Tensor of type float32,
+shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
+for the updated variance.
+
+
+*@par Attributes:
+*@li epsilon: A float32, specifying the small value added to variance to avoid dividing by zero.
+*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce
+
+*@par Outputs:
+* Three outputs, including: (NC1HWC0 supported)
+*@li y: A Tensor of type float16 or float32, for normalized "x".
+*@li batch_mean: A Tensor of type float32, for the updated mean.
+*@li batch_variance: A Tensor of type float32, for the updated variance . \n
+
+*@attention Constraints:
+*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
+* This operator is used in conjunction with GNTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
+*/
+REG_OP(GNTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .ATTR(num_groups, Int, 2)
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(GNTrainingUpdate)
+
+/**
+* @brief Calculates the standard deviation and average value of Tensors.
+
+* @par Inputs:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* Three Attributes, including:
+* @li dim: An optional listint, Defaults to "None". \n
+
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+
+* @par Outputs:
+* Two Outputs, including:
+* @li y1: A Tensor. Has the same type as "x".
+* @li y2: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator ReduceStd.
+*/
+REG_OP(ReduceStd)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .OP_END_FACTORY_REG(ReduceStd)
+} //namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/inc/ops/resource_variable_ops.h
new file mode 100644
index 00000000..74ac83f8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/resource_variable_ops.h
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file resource_variable_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Creates a handle to a Variable resource. \n
+
+*@par Outputs:
+*y:A Tensor of type resource. \n
+
+*@par Attributes:
+* @li container: optional, string.
+* @li shared_name: optional, string.
+* @li dtype: required, type.
+* @li shape: optional, ListInt. \n
+
+*@see VarHandleOp.
+*/
+
+REG_OP(VarHandleOp)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(shape, ListInt, ge::UNKNOWN_SHAPE)
+    .OUTPUT(y, TensorType({DT_RESOURCE}))
+    .OP_END_FACTORY_REG(VarHandleOp)
+
+/**
+*@brief Assigns a new value to a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value to set the new tensor to use. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignVariableOp.
+*/
+
+REG_OP(AssignVariableOp)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(AssignVariableOp)
+
+/**
+*@brief Adds a value to the current value of a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value by which the variable will be incremented. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignAddVariableOp.
+*/
+
+REG_OP(AssignAddVariableOp)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(AssignAddVariableOp)
+
+/**
+*@brief Subtracts a value to the current value of a variable. \n
+
+*@par Inputs:
+*resource:Handle to the resource in which to store the variable.
+*value:The value by which the variable will be incremented. \n
+
+*@par Attributes:
+* @li dtype: required, type. \n
+
+*@see AssignSubVariableOp.
+*/
+
+REG_OP(AssignSubVariableOp)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(AssignSubVariableOp)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/rnn.h b/third_party/fwkacllib/inc/inc/ops/rnn.h
new file mode 100644
index 00000000..12bb0ee8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/rnn.h
@@ -0,0 +1,965 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file rnn.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief: Basic LSTM Cell forward calculation.
+*@par Inputs:
+*five inputs:
+*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
+
+*@par Attributes:
+*@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
+*@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+*@li state_is_tuple:An bool identifying if the hidden state and cell state is tuple. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+
+*@par Outputs:
+*seven outputs:
+*@li mask:A 1D Tensor. Must be one of the following types: uint8.
+*@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li ht:A 4D Tensor. Must be one of the following types: float16.
+*@li it:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li jt:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li ft:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li ot:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(BasicLSTMCell)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(h, TensorType({DT_FLOAT16}))
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(ct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(ht, TensorType({DT_FLOAT16}))
+    .OUTPUT(it, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(jt, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(ft, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(ot, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(forget_bias, Float, 1.0)
+    .ATTR(state_is_tuple, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .OP_END_FACTORY_REG(BasicLSTMCell)
+
+/**
+*@brief: Dynamic LSTM forward calculation . \n
+
+*@par Inputs:
+*@li x:A 4D Tensor. Must be the type float32. The format must be FRACTAL_NZ.
+*@li w:A 4D Tensor. Must be the type float32. The format must be FRACTAL_Z.
+*@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n
+
+*@par Outputs:
+*output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicLSTM)
+    .INPUT(x, TensorType({DT_FLOAT32}))
+    .INPUT(w, TensorType({DT_FLOAT32}))
+    .INPUT(b, TensorType({DT_FLOAT32}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT32}))
+    .OP_END_FACTORY_REG(DynamicLSTM)
+
+/**
+*@brief: DynamicRNNGrad calculation.
+*@par Inputs:
+*ten inputs: \n
+*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y:A 1D Tensor. Must be one of the following types: int32. The format must be FRACTAL_NZ.
+*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li seq_length:A 1D Tensor. Must be one of the following types: int32.
+*@li mask:A 1D Tensor. Must be one of the following types: int8.
+*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to false.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li is_training:An bool identifying is training in the op. Default to true.
+
+*@par Outputs:
+*eight outputs: \n
+*@li dw:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li db:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*/
+REG_OP(DynamicRNNGrad)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_OUTPUT(dwco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 0)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, -1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(forget_bias, Float, 0.0)
+    .OP_END_FACTORY_REG(DynamicRNNGrad)
+
+/**
+*@brief: DynamicRNN calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
+
+*@par Attributes:
+*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li is_training:An bool identifying is training in the op. Default to true . \n
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the TF operator LSTM.
+*/
+REG_OP(DynamicRNN)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNN)
+
+/**
+*@brief: DynamicLSTMV2 calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .
+
+*@par Attributes:
+*@li num_output:An integer identifying the num projection in the op. Default to 0.
+*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase.
+*@li need_output_last:An bool identifying the time major in the op. Default to true.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the Caffe operator LSTM.
+*@par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicLSTMV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(num_output, Int, 0)
+    .ATTR(expose_hidden, Bool, false)
+    .ATTR(need_output_last, Bool, false)
+    .ATTR(forget_bias, Float, 0.0)
+    .OP_END_FACTORY_REG(DynamicLSTMV2)
+
+/**
+*@brief: LSTMInputGrad calculation.
+*@par Inputs:
+*ten inputs: \n
+*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+
+*@par Outputs:
+*eight outputs: \n
+*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*/
+REG_OP(LSTMInputGrad)
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dc_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate, TensorType({DT_FLOAT16}))
+    .OP_END_FACTORY_REG(LSTMInputGrad)
+
+
+/**
+*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
+*@par Inputs:
+*three inputs:
+*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
+
+*@par Attributes:
+*keep_prob:An integer identifying the keep prob in the op. Default to 1 . \n
+
+*@par Outputs:
+*two outputs:
+*@li dxt:A 4D Tensor. Must be one of the following types: float16, float32.
+*@li dht:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(BasicLSTMCellInputGrad)
+    .INPUT(dgate, TensorType({DT_FLOAT16}))
+    .INPUT(w, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
+    .OUTPUT(dxt, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .OUTPUT(dht, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .ATTR(keep_prob, Float, 1.0)
+    .OP_END_FACTORY_REG(BasicLSTMCellInputGrad)
+
+/**
+*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias.
+*@par Inputs:
+*three inputs:
+*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ . \n
+
+*@par Outputs:
+*two outputs:
+*@li dw:A 4D Tensor. Must be one of the following types: float16.
+*@li db:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(BasicLSTMCellWeightGrad)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(h, TensorType({DT_FLOAT16}))
+    .INPUT(dgate, TensorType({DT_FLOAT16}))
+    .OUTPUT(dw, TensorType({DT_FLOAT16}))
+    .OUTPUT(db, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(BasicLSTMCellWeightGrad)
+
+/**
+*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state.
+*@par Inputs:
+*eight inputs:
+*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dht:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li it:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li jt:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li ft:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li ot:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n
+
+*@par Attributes:
+*@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+
+*@par Outputs:
+*two outputs:
+*@li dgate:A 4D Tensor. Must be one of the following types: float16.
+*@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(BasicLSTMCellCStateGrad)
+    .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dht, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(it, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(jt, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(ft, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(ot, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate, TensorType({DT_FLOAT16}))
+    .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(forget_bias, Float, 1.0)
+    .ATTR(activation, String, "tanh")
+    .OP_END_FACTORY_REG(BasicLSTMCellCStateGrad)
+
+/**
+*@brief: RNN operator.
+*@par Inputs:
+*eight inputs:
+*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
+*@li x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li w_sh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
+
+*@par Attributes:
+*@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
+*@li num_output:An integer identifying the number of output features. Default to 0 . \n
+
+*@par Outputs:
+*two outputs:
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(RNN)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(cont, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(x_static, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(h_0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_xh, TensorType({DT_FLOAT16}))
+    .INPUT(bias_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(w_sh, TensorType({DT_FLOAT16}))
+    .INPUT(w_hh, TensorType({DT_FLOAT16}))
+    .INPUT(w_ho, TensorType({DT_FLOAT16}))
+    .INPUT(bias_o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(h_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(num_output, Int, 0)
+    .ATTR(expose_hidden, Bool, false)
+    .OP_END_FACTORY_REG(RNN)
+
+/**
+*@brief: BasicRNNCell operator.
+*@par Inputs:
+*eight inputs:
+*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND.
+*@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n
+
+*@par Attributes:
+*@li expose_hidden:An bool identifying if expose the hidden state of last time step. Default to false.
+*@li num_output:An integer identifying the number of output features. Default to 0 . \n
+
+*@par Outputs:
+*two outputs:
+*@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(BasicRNNCell)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(cont, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(w_xh_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(h_0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w_xh, TensorType({DT_FLOAT16}))
+    .INPUT(bias_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(w_hh, TensorType({DT_FLOAT16}))
+    .INPUT(w_ho, TensorType({DT_FLOAT16}))
+    .INPUT(bias_o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(h_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(expose_hidden, Bool, false)
+    .ATTR(num_output, Int, 0)
+    .OP_END_FACTORY_REG(BasicRNNCell)
+
+/**
+*@brief DynamicGRU calculation.
+*@par Inputs:
+*seven inputs: 
+*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li b:Must be one of the following types: float16, float32. The format must be ND.
+*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li cb:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+*@li is_training:An bool identifying is training in the op. Default to true.
+
+*@par Outputs:
+*five outputs: 
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRU)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(w, TensorType({DT_FLOAT16}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(cw, TensorType({DT_FLOAT16}))
+    .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(n, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRU)
+
+/**
+*@brief DynamicGRUV2 calculation.
+*@par Inputs:
+*seven inputs: 
+*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
+*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
+*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+*@li is_training:An bool identifying is training in the op. Default to true.
+
+*@par Outputs:
+*six outputs: 
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUV2)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRUV2)
+
+
+/**
+*@brief DynamicGRUV2Hidden calculation.
+*@par Inputs:
+*five inputs: 
+*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
+*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". 
+Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". 
+Only tanh is currently supported.
+*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+*@li is_training:An bool identifying is training in the op. Default to true.
+
+*@par Outputs:
+*six outputs: 
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUV2Hidden)
+    .INPUT(x_weight_input, TensorType({DT_FLOAT32}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRUV2Hidden)
+
+
+/**
+*@brief: DynamicGRUV2Grad calculation.
+*@par Inputs:
+*fourteen inputs: \n
+*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+
+*@par Outputs:
+*six outputs: \n
+*@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li db_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUV2Grad)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(dw_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dw_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(db_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 0)
+    .ATTR(keep_prob, Float, -1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRUV2Grad)
+
+/**
+*@brief: GRUV2HiddenGrad calculation.
+*@par Inputs:
+*nine inputs: \n
+*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li t_state:An Int identifying the current t state. Default to [0, 4].
+*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+
+*@par Outputs:
+*three outputs: \n
+*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GRUV2HiddenGradCell)
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(t_state, Int, 0)
+    .ATTR(gate_order, String, "zrh")
+    .OP_END_FACTORY_REG(GRUV2HiddenGradCell)
+
+/**
+* @brief Calculates the reversed outputs of the function "embedding". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li grad: A mutable Tensor of word grad. Must be one of the following types:
+*     float32.
+* @li indices: A mutable word index Tensor of the int32 type.\n
+
+* @par Attributes:
+* @li num_weights: An int attr which use to judge how many words in dict. \n
+
+* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n
+
+* @li scale_grad_by_freq: An optional bool. Defaults to "False".
+*     If "True", "grad_weight" will be scale by word_frequency.
+*     If "False", "grad_weight" will not be scale by word_frequency. \n
+
+* @par Outputs:
+* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator EmbeddingDenseGrad.
+*/
+REG_OP(EmbeddingDenseGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT32 }))  /* "First operand." */
+    .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT32 }))    /* "Result, has same element type as two inputs" */
+    .REQUIRED_ATTR(num_weights, Int)
+    .ATTR(padding_idx, Int, -1)
+    .ATTR(scale_grad_by_freq, Bool, false)
+    .OP_END_FACTORY_REG(EmbeddingDenseGrad)
+
+/**
+*@brief CommonLSTM calculation.
+*@par Inputs:
+*eight inputs: \n
+*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
+*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.
+
+*@par Attributes:
+*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
+*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
+*@li activations:The list of activation functions. Empty is currently supported.
+*@li clip:An float identifying the cell clip in the op. Default to -1.
+*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
+*@li hidden_size:Number of neurons in the hidden layer. Reserved.
+*@li input_forget:Couple the input and forget gates if 1. Reserved.
+
+*@par Outputs:
+*three outputs: \n
+*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*/
+
+REG_OP(CommonLSTM)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta, ListFloat, {})
+    .ATTR(activations, ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(input_forget, Int, 0)
+    .OP_END_FACTORY_REG(CommonLSTM)
+
+/**
+* @brief Common GRU calculation.
+
+* @par Inputs:
+* Eight inputs, including:
+* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ 
+* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z
+* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z
+* @li b: The bias tensor for the gates. The format must be ND
+* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
+* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ
+
+* @par Attributes:
+* @li activation_alpha: Optional scaling values used by some activation functions.  \n
+
+* @li activation_beta: Optional scaling values used by some activation functions.  \n
+
+* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates.  \n
+
+* @li clip: Cell clip threshold. \n
+
+* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n
+
+* @li hidden_size: Number of neurons in the hidden layer. \n
+
+* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n
+
+* @par Outputs:
+* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ
+
+* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
+*/
+REG_OP(CommonGRU)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta , ListFloat, {})
+    .ATTR(activations , ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(linear_before_reset , Int, 0)
+    .OP_END_FACTORY_REG(CommonGRU)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/inc/ops/rpn_ops.h
new file mode 100644
index 00000000..089af326
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/rpn_ops.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file rpn_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_
+
+#include "graph/operator_reg.h"
+namespace ge {
+/**
+*@brief Iteratively removes lower scoring boxes which have an IoU greater than
+* iou_threshold with higher scoring box according to their
+* intersection-over-union (IoU) . \n
+
+*@par Input:
+* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
+* corresponding confidence scores . \n
+
+* @par Attributes:
+* @li iou_threshold: An optional float. The threshold for deciding whether boxes
+* overlap too much with respect to IOU . \n
+
+* @par Outputs:
+* @li selected_boxes: 2-D tensor with shape of [N,5], representing filtered
+* boxes including proposal boxes and corresponding confidence scores.
+* @li selected_idx: 1-D tensor with shape of [N], representing the index of
+* input proposal boxes.
+* @li selected_mask: 1-D tensor with shape of [N], the symbol judging whether
+* the output proposal boxes is valid . \n
+
+* @attention Constraints:
+* The 2nd-dim of input box_scores must be equal to 8.\n
+* Only supports 2864 input boxes at one time.\n
+
+*/
+REG_OP(NMSWithMask)
+    .INPUT(box_scores, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(selected_boxes, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(selected_idx, TensorType({DT_INT32}))
+    .OUTPUT(selected_mask, TensorType({DT_UINT8}))
+    .ATTR(iou_threshold, Float, 0.5)
+    .OP_END_FACTORY_REG(NMSWithMask)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/save_ops.h b/third_party/fwkacllib/inc/inc/ops/save_ops.h
new file mode 100644
index 00000000..5ce6c2e0
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/save_ops.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file save_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Mark which tensors need to be saved to the ckpt file.
+*@par Inputs:
+*tensors: A list of input tensor.It's a dynamic input.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Save)
+    .DYNAMIC_INPUT(tensors, TensorType:ALL())
+    .OP_END_FACTORY_REG(Save)
+
+} // namespace ge
+
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/inc/ops/sdca_ops.h
new file mode 100644
index 00000000..34c6a268
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/sdca_ops.h
@@ -0,0 +1,92 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file sdca_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
+*linear models with L1 + L2 regularization. As global optimization objective is
+*strongly-convex, the optimizer optimizes the dual objective at each step. The
+*optimizer applies each update one example at a time. Examples are sampled
+*uniformly, and the optimizer is learning rate free and enjoys linear convergence
+*rate . \n
+
+*@par Inputs:
+*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input.
+*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input.
+*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input.
+*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input.
+*@li example_weights: a vector which contains the weight associated with each example.
+*@li example_labels: a vector which contains the label/target associated with each example.
+*@li sparse_indices: a list of vectors where each value is the indices which has
+*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
+*@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
+*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
+*@li example_state_data: a list of vectors containing the example state data.
+*@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
+*@li l1: Symmetric l1 regularization strength.
+*@li l2: Symmetric l2 regularization strength.
+*@li num_loss_partitions: Number of partitions of the global loss function.
+*@li num_inner_iterations: Number of iterations per mini-batch . \n
+
+*@par Outputs:
+*y: A Returns a list of vectors containing the updated example state
+*data.a list of vectors where each value is the delta
+*weights associated with a sparse feature group.a list of vectors where the values are the delta
+*weights associated with a dense feature group . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow SdcaOptimizerV2 operator.
+*/
+
+REG_OP(SdcaOptimizerV2)
+    .DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(sparse_feature_values, TensorType({DT_FLOAT}))
+    .DYNAMIC_INPUT(dense_features, TensorType({DT_FLOAT}))
+    .INPUT(example_weights, TensorType({DT_FLOAT}))
+    .INPUT(example_labels, TensorType({DT_FLOAT}))
+    .DYNAMIC_INPUT(sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(sparse_weights, TensorType({DT_FLOAT}))
+    .DYNAMIC_INPUT(dense_weights, TensorType({DT_FLOAT}))
+    .INPUT(example_state_data, TensorType({DT_FLOAT}))
+    .OUTPUT(out_example_state_data, TensorType({DT_FLOAT}))
+    .DYNAMIC_OUTPUT(out_delta_sparse_weights, TensorType({DT_FLOAT}))
+    .DYNAMIC_OUTPUT(out_delta_dense_weights, TensorType({DT_FLOAT}))
+    .ATTR(adaptive, Bool, false)
+    .ATTR(num_sparse_features, Int, 0)
+    .ATTR(num_sparse_features_with_values, Int, 0)
+    .ATTR(num_dense_features, Int, 0)
+    .ATTR(num_loss_partitions, Int, 1)
+    .ATTR(num_inner_iterations, Int, 1)
+    .ATTR(loss_type, String, "logistic_loss")
+    .ATTR(l1, Float, 0.5)
+    .ATTR(l2, Float, 0.5)
+    .OP_END_FACTORY_REG(SdcaOptimizerV2)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/inc/ops/selection_ops.h
new file mode 100644
index 00000000..33980d43
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/selection_ops.h
@@ -0,0 +1,2174 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file selection_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Creates a sequence of numbers . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li start: A 0D Tensor (scalar). Acts as first entry in the range if "limit"
+*   is not "None"; otherwise, acts as range limit and first entry defaults to "0".
+*   The supported types are: float32, int32, double, int64.
+* @li limit: A 0D Tensor (scalar). Upper limit of sequence, exclusive. If "None",
+*   defaults to the value of "start" while the first entry of the range
+*   defaults to "0". The supported types are: float32, int32, double, int64.
+* @li delta: A 0D Tensor (scalar). Number that increments "start".
+*   Defaults to "1". The supported types are: float32, int32, double, int64 . \n
+
+*@par Outputs:
+*y: A 1D Tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Range.
+*/
+REG_OP(Range)
+    .INPUT(start, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64}))
+    .INPUT(limit, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64}))
+    .INPUT(delta, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64}))
+    .OP_END_FACTORY_REG(Range)
+
+/**
+*@brief: Creates a sequence of numbers . \n
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A 1D Tensor of type float32 or int32. The assistant data.
+* @li start: A 0D Tensor (scalar) of type float32 or int32. Acts as first entry in the range if "limit"
+*   is not "None"; otherwise, acts as range limit and first entry defaults to "0".
+* @li limit: A 0D Tensor (scalar) of type float32 or int32.
+*   Upper limit of sequence, exclusive. If "None",
+*   defaults to the value of "start" while the first entry of the range
+*   defaults to "0".
+* @li delta: A 0D Tensor (scalar) of type float32 or int32.
+*   Number that increments "start". Defaults to "1" . \n
+
+*@par Outputs:
+*y: A 1D Tensor . \n
+
+*@par Quantization supported or not
+*Not supported
+
+*@par Quantized inference supported or not
+*Not supported
+
+*@par Multiple batches supported or not
+*Supported
+
+*@see Range()
+*@since V100R001C33
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead.
+*/
+REG_OP(RangeD)
+    .INPUT(x, TensorType({DT_FLOAT,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_INT32}))
+    .REQUIRED_ATTR(start, Float)
+    .REQUIRED_ATTR(limit, Float)
+    .REQUIRED_ATTR(delta, Float)
+    .OP_END_FACTORY_REG(RangeD)
+
+/**
+*@brief Constructs a tensor by tiling a given tensor . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor.
+* Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16,
+uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+* @li multiples: A 1D Tensor of type int32 or int64.
+*     The length must be the same as the number of dimensions in "input"
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@see TileD()
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Tile.
+*/
+REG_OP(Tile)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(multiples, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Tile)
+
+/**
+*@brief Constructs a tensor by tiling a given tensor . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float32, float16, int32 . \n
+
+*@par Attributes:
+*multiples: A required Tensor of type int32 or int64.
+*     Number of replication times . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@see Tile()
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Tile.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Tile instead.
+*/
+REG_OP(TileD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(multiples, ListInt)
+    .OP_END_FACTORY_REG(TileD)
+
+/**
+* @brief Gather slices from "x" into a tensor with shape specified by
+* "indices". "indices" is an K-dimensional integer tensor, best thought of as a
+* (K-1)-dimensional tensor of "indices" into "params", where each element
+* defines a slice of "params":
+*   output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]
+* "indices" defines slices into the first N dimensions of
+* "params", where
+*           N = indices.shape[-1]
+*     indices = [[0, 0], [1, 1]]
+*      x = [['a', 'b'], ['c', 'd']]
+*      output = ['a', 'd']
+
+* @par Inputs:
+* @li x: A Tensor of type BasicType.
+* @li indices: A Tensor of type IndexNumberType . \n
+
+* @par Outputs:
+* y: A Tensor of type BasicType.
+* @see GatherNd()
+
+* @attention Constraints:
+* @li "x" is one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16,
+*     complex128, uint32, uint64 . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator GatherNd.
+*/
+REG_OP(GatherNd)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(GatherNd)
+
+/**
+*@brief Gather slices from "x" according to "indices" by corresponding axis . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32,
+*     uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16,
+*     uint16, complex128, float16, uint32, uint64, complex64, complex128.
+* @li indices: A Tensor of type int32 or int64.
+* @li axis: A Tensor of type as int32 or int64,
+*     Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints:
+*Value in indices must be in range [0, x.shape[axis])
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator GatherV2 . \n
+
+*/
+REG_OP(GatherV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(axis, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(GatherV2)
+
+/**
+*@brief Gather slices from "x" according to "indices" by corresponding axis . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float16, int32, uint32, int8, uint8,
+*     int16, uint16, int64, uint64.
+* @li indices: A Tensor of type int32 or int64 . \n
+
+*@par Attributes:
+*axis: A int32 specifying the axis to gather from . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints:
+
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator GatherV2.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead.
+*/
+REG_OP(GatherV2D)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8,
+                          DT_INT16, DT_UINT16, DT_INT64, DT_UINT64}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8,
+                          DT_INT16, DT_UINT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(GatherV2D)
+
+/**
+*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
+    extracts a slice of size (end-begin)/stride from the given input tensor.
+    Starting at the location specified by begin the slice continues by
+    adding stride to the index until all dimensions are not less than end.
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128.
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n
+
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n
+
+* @li strides: A Tensor of type int32 or int64, for the increment . \n
+
+*@par Attributes:
+* @li begin_mask: A Tensor of type int32.
+      A bitmask where a bit "i" being "1" means to ignore the begin
+      value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32.
+      Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th position
+      is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th
+      specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" implies that the "i"th
+      specification should shrink the dimensionality . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints:
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSlice.
+*/
+REG_OP(StridedSlice)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .INPUT(strides, TensorType::IndexNumberType())
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSlice)
+
+/**
+*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
+    extracts a slice of size "(end-begin)/stride" from the given input tensor.
+    Starting at the location specified by "begin" the slice continues by
+    adding "stride" to the index until all dimensions are not less than "end" . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*  complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*  complex128, float16, uint32, uint64, complex64, complex128 . \n
+
+*@par Attributes:
+* @li begin: A Tensor of type int32 or int64.
+      The index of the first value to select.
+* @li end: A Tensor of type int32 or int64.
+      The index of the last value to select.
+* @li strides: A Tensor of type int32 or int64, for the increment.
+* @li begin_mask: A Tensor of type int32.
+      A bitmask where a bit "i" being "1" means to ignore the begin
+      value and instead use the largest interval possible.
+* @li end_mask: Analogous to "begin_mask". A Tensor of type as int32.
+* @li ellipsis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th position
+      is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th
+      specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" implies that the "i"th
+      specification should shrink the dimensionality . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints:
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSlice.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
+*/
+REG_OP(StridedSliceD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
+                          DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
+                          DT_BOOL}))
+    .REQUIRED_ATTR(begin, ListInt)
+    .REQUIRED_ATTR(end, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OP_END_FACTORY_REG(StridedSliceD)
+
+/**
+*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy",
+    its gradient will have the same shape (which is passed here as "shape").
+    The gradient will be zero in any element that the slice does not select . \n
+
+*@par Inputs:
+*dy: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*   complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*   complex128, float16, uint32, uint64, complex64, complex128 . \n
+
+*@par Attributes:
+* @li shape: A Tensor of type int32 or int64.
+* @li begin: A Tensor of type int32 or int64.
+      The index of the first value to select.
+* @li end: A Tensor of type int32 or int64.
+      The index of the last value to select.
+* @li strides: A Tensor of type int32 or int64, for the increment.
+* @li begin_mask: A Tensor of type int32.
+      A bitmask where a bit "i" being "1" means to ignore the begin
+      value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32.
+      Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th position
+      is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th
+      specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" implies that the "i"th
+      specification should shrink the dimensionality . \n
+
+*@par Outputs:
+*output: A Tensor. Has the same type as "dy" . \n
+
+*@attention Constraints:
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSliceGradD.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead.
+*/
+REG_OP(StridedSliceGradD)
+    .INPUT(dy, TensorType::BasicType())
+    .OUTPUT(output, TensorType::BasicType())
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(begin, ListInt)
+    .REQUIRED_ATTR(end, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OP_END_FACTORY_REG(StridedSliceGradD)
+
+/**
+*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy",
+    its gradient will have the same shape (which is passed here as "shape").
+    The gradient will be zero in any element that the slice does not select . \n
+
+*@par Inputs:
+*Five inputs, including:
+* @li shape: A Tensor of type int32 or int64.
+* @li begin: A Tensor of type int32 or int64.
+      The index of the first value to select.
+* @li end: A Tensor of type int32 or int64.
+      The index of the last value to select.
+* @li strides: A Tensor of type int32 or int64, for the increment.
+* @li dy: A Tensor. Must be one of the following types:
+*     float32, float64, int32, uint8, int16, int8,
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, float16, uint32, uint64, complex64, complex128 . \n
+
+*@par Attributes:
+* @li begin_mask: A Tensor of type int32.
+      A bitmask where a bit "i" being "1" means to ignore the begin
+      value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32.
+      Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th position
+      is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" being "1" means the "i"th
+      specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32.
+      A bitmask where bit "i" implies that the "i"th
+      specification should shrink the dimensionality . \n
+
+*@par Outputs:
+*output: A Tensor has the same type as "dy" . \n
+
+*@attention Constraints:
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSliceGrad.
+*/
+REG_OP(StridedSliceGrad)
+    .INPUT(shape, TensorType::IndexNumberType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .INPUT(strides, TensorType::IndexNumberType())
+    .INPUT(dy, TensorType::BasicType())
+    .OUTPUT(output, TensorType::BasicType())
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OP_END_FACTORY_REG(StridedSliceGrad)
+
+/**
+*@brief Computes the sum along segments of a tensor . \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor of type NumberType.
+* @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix
+* of "x.shape".
+* @li num_segments: A Tensor of type IndexNumberType . \n
+
+*@par Outputs:
+*y: A Tensor of type NumberType . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator UnsortedSegmentSum.
+*/
+REG_OP(UnsortedSegmentSum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .INPUT(num_segments, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .OP_END_FACTORY_REG(UnsortedSegmentSum)
+
+/**
+*@brief Computes the sum along segments of a tensor . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor of type float16, float32, int32, int8, uint8.
+* @li segment_ids: A Tensor of type int32, whose shape is a prefix
+* of "x.shape" . \n
+
+*@par Attributes:
+*num_segments: An int32, specifying the number of distinct segment IDs . \n
+
+*@par Outputs:
+*y: A Tensor with same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator UnsortedSegmentSum.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead.
+*/
+REG_OP(UnsortedSegmentSumD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(num_segments, Int)
+    .OP_END_FACTORY_REG(UnsortedSegmentSumD)
+
+/**
+*@brief Reverses specific dimensions of a tensor . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An ND Tensor (up to 8D).
+*Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float16, float32, double, complex64, complex128, string.
+*@li axis: A 1D Tensor.
+*Must be one of the following types: int32, int64
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x"
+
+*@attention Constraints:
+"axis" must be within the rank of "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReverseV2.
+*/
+REG_OP(ReverseV2)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                          DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                          DT_COMPLEX64, DT_COMPLEX128, DT_STRING}))
+    .INPUT(axis, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                           DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128, DT_STRING}))
+    .OP_END_FACTORY_REG(ReverseV2)
+
+/**
+*@brief Reverses specific dimensions of a tensor . \n
+
+*@par Inputs:
+* One input:
+*@li x: An ND Tensor (up to 8D).
+* Must be one of the following types: int8, uint8, int16, uint16, int32,
+* int64, bool, float16, float, double, complex64, complex128, string . \n
+
+*@par Attributes:
+*axis: The indices of the dimensions to reverse. Support type: listInt . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x"
+
+*@attention Constraints:
+"axis" must be within the rank of "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ReverseV2.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ReverseV2 instead.
+*/
+REG_OP(ReverseV2D)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                          DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                          DT_COMPLEX64, DT_COMPLEX128, DT_STRING}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                           DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
+                           DT_COMPLEX64, DT_COMPLEX128, DT_STRING}))
+    .REQUIRED_ATTR(axis, ListInt)
+    .OP_END_FACTORY_REG(ReverseV2D)
+
+/**
+*@brief: Selects elements from "x1" or "x2", depending on "condition" . \n
+
+*@par Inputs:
+* Three inputs, including:
+* @li condition: A Tensor of type bool.
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
+ * int32, int8, uint8, int16, uint16, double, complex64, int64, complex128
+ * half, qint8, quint8, qint16, quint16, qint32, quint32, uint32, uint64.
+ * format:ND
+* @li x2: A Tensor of the same type as "x1".format:ND
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1". format:ND
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Select.
+*/
+REG_OP(Select)
+    .INPUT(condition, TensorType({DT_BOOL}))
+    .INPUT(x1,TensorType::BasicType())
+    .INPUT(x2,TensorType::BasicType())
+    .OUTPUT(y,TensorType::BasicType())
+    .OP_END_FACTORY_REG(Select)
+
+/**
+*@brief: SelectV2s elements from "then" or "else", depending on "condition" . \n
+
+*@par Inputs:
+* Three inputs, including:
+* @li condition: A Tensor of type bool.
+* @li then: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8.
+* @li else: A Tensor of the same type as "then" . \n
+
+*@par Outputs:
+*result: A Tensor. Has the same type as "then" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SelectV2.
+*/
+REG_OP(SelectV2)
+    .INPUT(condition, TensorType({DT_BOOL}))
+    .INPUT(then,TensorType::BasicType())
+    .INPUT(else,TensorType::BasicType())
+    .OUTPUT(result,TensorType::BasicType())
+    .OP_END_FACTORY_REG(SelectV2)
+
+
+/**
+*@brief: Computes the maximum along segments of a tensor.
+*Computes a tensor such that output[i]=(data[i]) where max is over j such that segment_ids[j] == i.
+*If the max is empty for a given segment ID i, output[i] = 0
+
+*@par Inputs:
+*Two inputs, include:
+* @li x:A Tensor of type float16, float32, int32,int8,uint8.
+* @li segment_ids:should be the size of the first dimension
+        must sorted and need not cover all values in the full range of valid values
+        must be positive intege
+
+*@par Outputs:
+*y:A Tensor with same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SegmentMax.
+*/
+REG_OP(SegmentMax)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(SegmentMax)
+
+/**
+*@brief: Computes the maximum along segments of a tensor.
+*Computes a tensor such that output[i]=(data[i]) where max is over j
+ * such that segment_ids[j] == i.
+*If the max is empty for a given segment ID i, output[i] = 0
+
+*@par Inputs:
+*One inputs, include:
+* @li x:A Tensor of type float16, float, int32. format:ND
+
+*@par Attributes:
+* @li segment_ids:should be the size of the first dimension
+        must sorted and need not cover all values in
+        the full range of valid values must be positive intege
+
+*@par Outputs:
+*y:A Tensor with same type as "x". format:ND
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SegmentMax.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use SegmentMax instead.
+*/
+REG_OP(SegmentMaxD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(segment_ids, ListInt)
+    .OP_END_FACTORY_REG(SegmentMaxD)
+
+/**
+*@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value",
+*         while all other locations take value "off_value" . \n
+
+*@par Inputs:
+*Four inputs, including:
+* @li x: A Tensor of indices. Must be one of the following types: int32, uint8, int64.
+* @li depth: A scalar of type int32. The depth of the one hot dimension.
+* @li on_value: A scalar. The value to fill in output when indices[j] = i,
+*     Must be one of the following types: float16, float32, int32, int8, uint8.
+* @li off_value: A scalar. The value to fill in output when indices[j] != i,
+*     Has the same type as "on_value" . \n
+
+*@par Attributes:
+*axis: An int. The axis to fill. Defaults to "-1" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "on_value" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator OneHot.
+*/
+REG_OP(OneHot)
+    .INPUT(x, TensorType({DT_UINT8, DT_INT32, DT_INT64}))
+    .INPUT(depth, TensorType({DT_INT32}))
+    .INPUT(on_value, TensorType::BasicType())
+    .INPUT(off_value, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(axis, Int, -1)
+    .OP_END_FACTORY_REG(OneHot)
+
+/**
+*@brief Returns a one-hot tensor. The locations represented by index in "x" take value "on_value",
+*         while all other locations take value "off_value" . \n
+
+*@par Inputs:
+*Three inputs, including:
+*@li x: A Tensor of indices. Must be one of the following types: int32, uint8, int64.
+*@li on_value: A scalar. The value to fill in output when indices[j] = i,
+*     Must be one of the following types: float16, float32, int32, int8, uint8.
+*@li off_value: A scalar. The value to fill in output when indices[j] != i,
+*     Has the same type as "on_value" . \n
+
+*@par Attributes:
+*@li depth: A scalar of type int32. The depth of the one hot dimension.
+*@li axis: An int. The axis to fill. Defaults to "-1" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "on_value" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator OneHot.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead.
+*/
+REG_OP(OneHotD)
+    .INPUT(x, TensorType({DT_UINT8, DT_INT32}))
+    .INPUT(on_value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT8,
+                                 DT_INT8}))
+    .INPUT(off_value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT8,
+                                  DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT8, DT_INT8}))
+    .REQUIRED_ATTR(depth, Int)
+    .ATTR(axis, Int, -1)
+    .OP_END_FACTORY_REG(OneHotD)
+
+/**
+*@brief Extracts a slice from a tensor.
+*       This operation extracts a slice of size "size" from a tensor "x"
+*		starting at the location specified by "begin" . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li offsets: A Tensor of type int32 or int64. The starting location for the slice.
+*@li size: A Tensor of type int32 or int64. The tensor shape . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". The slice extracted from the tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Slice.
+*/
+REG_OP(Slice)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(offsets, TensorType::IndexNumberType())
+    .INPUT(size, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Slice)
+
+/**
+*@brief Extracts a slice from a tensor.
+*       This operation extracts a slice of size "size" from a tensor "x"
+*		starting at the location specified by "begin" . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
+
+*@par Attributes:
+*@li offsets: The starting location for the slice.
+*@li size: The tensor shape . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
+*/
+REG_OP(SliceD)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(offsets, ListInt)
+    .REQUIRED_ATTR(size, ListInt)
+    .OP_END_FACTORY_REG(SliceD)
+
+/**
+*@brief Extracts a slice from a tensor.
+*       This operation extracts a slice of size "size" from a tensor "x"
+*		starting at the location specified by "begin" . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
+
+*@par Inputs:
+*@li offsets: The starting location for the slice.
+
+*@par Attributes:
+*@li size: The tensor shape . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
+*/
+REG_OP(SliceDV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(offsets, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(size, ListInt)
+    .OP_END_FACTORY_REG(SliceDV2)
+    
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type float16, with the last dimension at
+* least "k".
+* Specifies the data to sort.
+* @li assist_seq: A 1D tensor of type float16.
+* with size of 2N, which "N" is the last dimension.
+* The first N numbers is indices, and the next N numbers is deviation of casting
+* int32 to float16. \n
+
+* @par Attributes:
+* @li k: A required int that is at least 0, specifying the number of top elements
+* to look for along the last dimension (along each row for matrices).
+* @li sorted: An optional bool. Defaults to true.
+* If true, the resulting "k" elements will be sorted by the values in descending
+* order.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool. Defaults to true. For reserved use. \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @attention Constraints:
+* @li k =< 5120
+* @li Size of the last dimension =< 1458176
+* @li sorted = true
+* @li It's unstable sorted indices on the platform of Ascend310
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
+*/
+REG_OP(TopKD)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(assist_seq, TensorType({DT_FLOAT16}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(k, Int)
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKD)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
+* for matrices) . \n
+
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to true.
+* If true, the resulting "k" elements will be sorted by the values in descending
+* order.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool. Defaults to true. For reserved use. \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator TopKV2.
+*/
+REG_OP(TopKV2)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKV2)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
+* for matrices) . \n
+
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to true.
+* If true, the resulting "k" elements will be sorted by the values in descending
+* order.
+* @li T: Indicator of indices type . \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator TopKV2.
+*/
+REG_OP(TopK)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .OP_END_FACTORY_REG(TopK)
+/**
+*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n
+
+*@par Inputs:
+*Inputs including:
+* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
+* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
+* @li shape: A required list of int32, specifying the output shape.
+*@par Outputs:
+*y:A output Tensor with same datatype as "updates" . \n
+
+*@attention Constraints:
+*@li "y" has the same shape as "shape".
+*@li "y" has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNd.
+*/
+REG_OP(ScatterNd)
+    .INPUT(indices, TensorType::BasicType())
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(shape, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(ScatterNd)
+/**
+*@brief Creates a new tensor by applying sparse "updates" to individual values
+ * or slices within a tensor (initially zero for numeric, empty for string) of
+ * the given "shape" according to "indices" . \n
+
+*@par Inputs:
+*Inputs including:
+* @li indices: A required index tensor. Must be one of the following types:
+ * float, float16, int32, int16. format:ND.
+* @li x: A required slice tensor. Must be one of the following types:
+ * float, float16, int32, int16. format:ND.
+*@par Attributes:
+* @li shape: A required list of int32, specifying the output shape.
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". format:ND . \n
+
+*@attention Constraints:
+*@li "y" has the same shape as "shape".
+*@li "y" has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ScatterNd.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ScatterNd instead.
+*/
+REG_OP(ScatterNdD)
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(ScatterNdD)
+
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids . \n
+
+* @par Attributes:
+* @li k: A required IndexNumberType, specifying the number of top elements to
+* look at for computing precision . \n
+
+* @par Outputs:
+* y: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li x2 must be non-negative tensor.
+
+* @see InTopK()
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator InTopK.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use InTopK instead.
+*/
+REG_OP(InTopKD)
+    .INPUT(x1, TensorType({DT_FLOAT}))
+    .INPUT(x2, TensorType({IndexNumberType}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .REQUIRED_ATTR(k, Int)
+    .OP_END_FACTORY_REG(InTopKD)
+
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
+* @li k: A 1D Tensor of the same type as "x2".
+* Specifies the number of top elements to look at for computing precision . \n
+
+* @par Outputs:
+* y: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li x2 must be non-negative tensor.
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator InTopKV2.
+*/
+REG_OP(InTopK)
+    .INPUT(x1, TensorType({DT_FLOAT}))
+    .INPUT(x2, TensorType(IndexNumberType))
+    .INPUT(k, TensorType({IndexNumberType}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(InTopK)
+
+/**
+* @brief Assigns "value" to the sliced l-value reference of "var".
+* The values of "value" are assigned to the positions in the variable. "var"
+* that are selected by the slice parameters. The slice parameters "begin, "end",
+* "strides", etc. work exactly as in "StridedSlice" . \n
+
+* @par Inputs:
+* Five inputs, including:
+* @li var: A mutable ND Tensor of type BasicType.
+* @li begin: A mutable ND Tensor of type IndexNumberType.
+* Specifies the index of the first value to select.
+* @li end: A mutable ND Tensor of type IndexNumberType.
+* Specifies the index of the last value to select.
+* @li strides: A mutable ND Tensor of type IndexNumberType.
+* Specifies the stride to select.
+* @li input_value: A mutable ND Tensor of type BasicType . \n
+
+* @par Attributes:
+* @li begin_mask: An optional int. Defaults to "0".
+* @li end_mask: An optional int. Defaults to "0".
+* @li ellipsis_mask: An optional int. Defaults to "0".
+* @li new_axis_mask: An optional int. Defaults to "0".
+* @li shrink_axis_mask: An optional int. Defaults to "0" . \n
+
+* @par Outputs:
+* var: A mutable Tensor. Has the same type as "var" . \n
+
+* @attention Constraints:
+* This operator currently does not support broadcasting. Therefore, the shape
+* of "value" must be exactly the shape produced by the slice of "var" . \n
+
+* @see StridedSlice()
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator StridedSlice.
+*/
+REG_OP(StridedSliceAssign)
+    .INPUT(var, TensorType(BasicType))
+    .INPUT(begin, TensorType(IndexNumberType))
+    .INPUT(end, TensorType(IndexNumberType))
+    .INPUT(strides, TensorType(IndexNumberType))
+    .INPUT(input_value, TensorType(BasicType))
+    .OUTPUT(var, TensorType(BasicType))
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OP_END_FACTORY_REG(StridedSliceAssign)
+
+/**
+* @brief Assigns "value" to the sliced l-value reference of "var".
+* The values of "value" are assigned to the positions in the variable. "var"
+* that are selected by the slice parameters. The slice parameters "begin, "end",
+* "strides", etc. work exactly as in "StridedSlice" . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li var: A mutable ND Tensor of the following types:int32, int16, float16, float32.
+* @li input_value: A mutable ND "Tensor" of the following types:int32, int16, float16, float32 . \n
+
+* @par Attributes:
+* @li begin: A required list of ints.
+* Specifies the index of the first value to select.
+* @li end: A required list of ints.
+* Specifies the index of the last value to select.
+* @li strides: A required list of ints. Specifies the stride to select.
+* @li begin_mask: An optional int. Defaults to "0".
+* @li end_mask: An optional int. Defaults to "0".
+* @li ellipsis_mask: An optional int. Defaults to "0".
+* @li new_axis_mask: An optional int. Defaults to "0".
+* @li shrink_axis_mask: An optional int. Defaults to "0" . \n
+
+* @par Outputs:
+* var: A mutable Tensor. Has the same type as input "var" . \n
+
+* @attention Constraints:
+* This operator currently does not support broadcasting. Therefore, the shape of
+* "value" shape must be exactly the shape produced by the slice of "var" . \n
+
+* @see StridedSlice()
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceAssign instead.
+*/
+REG_OP(StridedSliceAssignD)
+    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .INPUT(input_value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .REQUIRED_ATTR(begin, ListInt)
+    .REQUIRED_ATTR(end, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OP_END_FACTORY_REG(StridedSliceAssignD)
+
+/**
+*@brief Gather slices from "params" according to "indices"."indices" must be
+    an integer tensor of any dimension(usually 0-D or 1-D).
+    Produces an output tensor with shape "indices.shape + params.shape[1:]" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+*     int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     float16, uint32, uint64, complex64, complex128.
+* @li indices: A Tensor of type int32 or int64 . \n
+
+*@par Attributes:
+*validate_indices: A bool specifying whether to verify the argument of "indice" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@attention Constraints:
+* "indices" is in the range [0, x.shape[0]) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Gather . \n
+
+*/
+REG_OP(Gather)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(validate_indices, Bool, true)
+    .OP_END_FACTORY_REG(Gather)
+
+/**
+*@brief Computes the cumulative product of the tensor "x" along "axis" . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64
+*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
+*
+*@par Attributes:
+*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input
+* is identical to the first element of the output. If "True", performs exclusive cumprod.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumprod.
+*/
+REG_OP(Cumprod)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axis, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(Cumprod)
+
+/**
+*@brief Computes the cumulative product of the tensor "x" along "axis" . \n
+
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64
+*
+*@par Attributes:
+*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
+*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input
+* is identical to the first element of the output. If "True", performs exclusive cumprod.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumprod.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumprod instead.
+*/
+REG_OP(CumprodD)
+    .INPUT(x, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .REQUIRED_ATTR(axis, Int)
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(CumprodD)
+
+/**
+*@brief Computes the cumulative sum of the tensor "x" along "axis" . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64.
+*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
+*
+*@par Attributes:
+*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is
+* identical to the first element of the output. If "True", performs exclusive cumsum.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*@li y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumsum.
+*/
+REG_OP(Cumsum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axis, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(Cumsum)
+
+/**
+*@brief Computes the cumulative sum of the tensor "x" along "axis".
+*
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
+* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64.
+*
+*@par Attributes:
+*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0".
+*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is
+* identical to the first element of the output. If "True", performs exclusive cumsum.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumsum.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumsum instead.
+*/
+REG_OP(CumsumD)
+    .INPUT(x, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .REQUIRED_ATTR(axis, Int)
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(CumsumD)
+
+/**
+*@brief Updates specified rows with values in v.
+*Computes x[i, :] = v; return x.
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor.
+*     TensorType::NumberType().
+* @li indices: A vector of type int32.
+*     Indices into the left-most dimension of "x".
+* @li v: A Tensor of the same type as "x".
+*     Same dimension sizes as x except the first dimension,
+*     which must be the same as the size of "indices" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+*   An alias of "x". The content of "y" is undefined if there are duplicates in indices.
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceUpdate.
+*/
+REG_OP(InplaceUpdate)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(v, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(InplaceUpdate)
+
+/**
+*@brief Updates specified rows with values in v.
+*Computes x[i, :] = v; return x.
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor of type int32, float16, floay32.
+* @li v: A Tensor of the same type as "x".
+* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n
+
+*@par Attributes:
+*indices: A required list of ints. Indices into the left-most dimension of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+*   An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceUpdate.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead.
+*/
+REG_OP(InplaceUpdateD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(v, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(indices, ListInt)
+    .OP_END_FACTORY_REG(InplaceUpdateD)
+
+/**
+*@brief Adds "v" into specified rows of "x".
+*Computes y = x; y[i, :] += v.
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor.
+*     TensorType::NumberType().
+* @li indices: A vector of type int32.
+*     Indices into the left-most dimension of "x".
+* @li v: A Tensor of the same type as "x".
+*     Same dimension sizes as x except the first dimension,
+*     which must be the same as the size of "indices" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+*  An alias of "x". The content of "y" is undefined if there are duplicates in indices.
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceAdd.
+*/
+REG_OP(InplaceAdd)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(v, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(InplaceAdd)
+
+/**
+*@brief Adds "v" into specified rows of "x".
+*Computes y = x; y[i, :] += v.
+*@par Inputs:
+*Two inputs, including:
+* @li x: A Tensor of type is int32, float16, float32.
+* @li v: A Tensor of the same type as "x".
+* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n
+
+*@par Attributes:
+*indices: A required list of ints. Indices into the left-most dimension of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+*  An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceAdd.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead.
+*/
+REG_OP(InplaceAddD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(v, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(indices, ListInt)
+    .OP_END_FACTORY_REG(InplaceAddD)
+
+/**
+*@brief Subtracts "v" into specified rows of "x".
+*Computes y = x; y[i, :] -= v; return y.
+*@par Inputs:
+**Three inputs, including:
+* @li x: A Tensor. TensorType::NumberType().
+* @li indices: A vector of type int32. Indices into the left-most dimension of x.
+* @li v: A Tensor of the same type as "x".
+* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*  An alias of "x". The content of "y" is undefined if there are duplicates in indices . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceSub.
+*/
+REG_OP(InplaceSub)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(v, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(InplaceSub)
+
+/**
+*@brief Subtracts "v" into specified rows of "x".
+*Computes y = x; y[i, :] -= v . \n
+
+*@par Inputs:
+**Two inputs, including:
+* @li x: A Tensor of type is int32, float16, float32.
+* @li v: A Tensor of the same type as "x".
+* Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices" . \n
+
+*@par Attributes:
+*indices: A required list of ints. Indices into the left-most dimension of "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+* An alias of x. The content of y is undefined if there are duplicates in indices . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator InplaceSub.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead.
+*/
+REG_OP(InplaceSubD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .INPUT(v, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(indices, ListInt)
+    .OP_END_FACTORY_REG(InplaceSubD)
+
+/**
+* @brief Applies sparse addition to input "x" using individual values or slices
+* from "updates" according to "indices". The updates are non-aliasing: "x" is
+* only modified in-place if no other operations will use it. Otherwise, a copy
+* of "x" is made. This operation has a gradient with respect to both "x" and
+* "updates" . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor of type NumberType. A batch_size x classes tensor.
+* @li indices: A Tensor of type IndexNumberType. Specifies the indices into "x".
+* @li updates: A Tensor. Must have the same type as "x".
+* Specifies the updated values to add to "x" . \n
+
+* @par Outputs:
+* y: A Tensor with the same shape as "x", containing values of "x" updated with
+* "updates" . \n
+
+* @see ScatterNd(),ScatterNdAdd()
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator ScatterNDNonAliasingAdd.
+*/
+REG_OP(ScatterNonAliasingAdd)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType::NumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .OP_END_FACTORY_REG(ScatterNonAliasingAdd)
+
+/**
+* @brief Computes the minimum along segments of a tensor . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor of type RealNumberType.
+* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
+* of "x.shape".
+* @li num_segments: A Tensor of type IndexNumberType . \n
+
+* @par Outputs:
+* y: A Tensor of type RealNumberType . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentSum(), UnsortedSegmentProd(),
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator UnsortedSegmentMin.
+*/
+REG_OP(UnsortedSegmentMin)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .INPUT(num_segments, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(UnsortedSegmentMin)
+
+/**
+* @brief Computes the minimum along segments of a tensor . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A Tensor of the following types:int32, int16, float16, float32.
+* @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix
+* of "x.shape" . \n
+
+* @par Attributes:
+* num_segments: A required int32, specifying the number of distinct segment IDs . \n
+
+* @par Outputs:
+* y: A Tensor.Must have the same type as input "x" . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentProdD(), UnsortedSegmentSumD(),
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentMin instead.
+*/
+REG_OP(UnsortedSegmentMinD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .REQUIRED_ATTR(num_segments, Int)
+    .OP_END_FACTORY_REG(UnsortedSegmentMinD)
+
+/**
+* @brief Computes the maximum along segments of a tensor . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor of type RealNumberType.
+* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
+* of "x.shape".
+* @li num_segments: A Tensor of type IndexNumberType . \n
+
+* @par Outputs:
+* y: A Tensor of type RealNumberType . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentSum(), UnsortedSegmentProd(),
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator UnsortedSegmentMax.
+*/
+REG_OP(UnsortedSegmentMax)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .INPUT(num_segments, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .OP_END_FACTORY_REG(UnsortedSegmentMax)
+
+/**
+* @brief Computes the maximum along segments of a tensor . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A Tensor of the following types:int32, int16, float16, float32.
+* @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix
+* of "x.shape" . \n
+
+* @par Attributes:
+* num_segments: A required int32, specifying the number of distinct segment IDs . \n
+
+* @par Outputs:
+* y: A Tensor.Must have the same type as input "x" . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentProdD(),
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentMax instead.
+*/
+REG_OP(UnsortedSegmentMaxD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .REQUIRED_ATTR(num_segments, Int)
+    .OP_END_FACTORY_REG(UnsortedSegmentMaxD)
+/**
+* @brief Computes the product along segments of a tensor . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor of type NumberType.
+* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
+* of "x.shape".
+* @li num_segments: A Tensor of type IndexNumberType . \n
+
+* @par Outputs:
+* y: A Tensor of type NumberType . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentSum(), UnsortedSegmentMin(),
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator UnsortedSegmentProd.
+*/
+REG_OP(UnsortedSegmentProd)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(segment_ids, TensorType::IndexNumberType())
+    .INPUT(num_segments, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .OP_END_FACTORY_REG(UnsortedSegmentProd)
+
+/**
+* @brief Computes the product along segments of a tensor . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A Tensor of the following types:int32, int16, float16, float32.
+* @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix
+* of "x.shape" . \n
+
+* @par Attributes:
+* num_segments: An int32, specifying the number of distinct segment IDs . \n
+
+* @par Outputs:
+* y: A Tensor.Must have the same type as input "x" . \n
+
+* @attention Constraints:
+* @li segment_ids must be non-negative tensor.
+
+* @see UnsortedSegmentMinD()
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead.
+*/
+REG_OP(UnsortedSegmentProdD)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .INPUT(segment_ids, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
+    .REQUIRED_ATTR(num_segments, Int)
+    .OP_END_FACTORY_REG(UnsortedSegmentProdD)
+
+/**
+*@brief Performs object detection . \n
+
+*@par Inputs:
+*@li cls_prob: An NCHW tensor of type float16 or float32, specifying the probability of the proposal is the background class.
+*@li bbox_delta: An NCHW tensor of type float16 or float32, specifying the coordinates of the proposals bounding boxes.
+*@li im_info: An ND tensor of type float16 or float32, specifying the Image information . \n
+
+*@par Attributes:
+*@li feat_stride: A optional float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16".
+*@li base_size: A optional float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16".
+*@li min_size: A optional float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16".
+*@li ratio: A optional list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2].
+*@li scale: A optional list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32].
+*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000".
+*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304".
+*@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to "0.7".
+*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n
+
+*@par Outputs:
+*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num".
+*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+ REG_OP(Proposal)
+     .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(bbox_delta, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(im_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(actual_rois_num, TensorType({DT_INT32}))
+     .ATTR(feat_stride, Float, 16)
+     .ATTR(base_size, Float, 16)
+     .ATTR(min_size, Float, 16)
+     .ATTR(ratio, ListFloat, {0.5, 1, 2})
+     .ATTR(scale, ListFloat, {8, 16, 32})
+     .ATTR(pre_nms_topn, Int, 3000)
+     .ATTR(post_nms_topn, Int, 304)
+     .ATTR(iou_threshold, Float, 0.7)
+     .ATTR(output_actual_rois_num, Bool, false)
+     .OP_END_FACTORY_REG(Proposal)
+
+/**
+*@brief Performs object detection. Different from Proposal, this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. The suffix "D" in the API name will be removed from the generated model . \n
+
+*@par Inputs:
+*@li cls_prob: An NCHW tensor of type float16, specifying the probability of the proposal is the background class.
+*@li bbox_delta: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes.
+*@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
+*@li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes . \n
+
+*@par Attributes:
+*@li feat_stride: A required float32, specifying the stride of the sliding window. Must be greater than "0".Defaults to "16".
+*@li base_size: A required float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16".
+*@li min_size: A required float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16".
+*@li ratio: A required list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2].
+*@li scale: A required list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32].
+*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000".
+*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304".
+*@li iou_threshold: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to 0.7.
+*@li output_actual_rois_num: An optional bool. Defaults to "false" . \n
+
+*@par Outputs:
+*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num".
+*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Proposal instead.
+*/
+REG_OP(ProposalD)
+     .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(bbox_delta, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(im_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(rpn_bbox, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(actual_rois_num, TensorType({DT_INT32}))
+     .ATTR(feat_stride, Float, 16)
+     .ATTR(base_size, Float, 16)
+     .ATTR(min_size, Float, 16)
+     .ATTR(ratio, ListFloat, {0.5, 1, 2})
+     .ATTR(scale, ListFloat, {8, 16, 32})
+     .ATTR(pre_nms_topn, Int, 3000)
+     .ATTR(post_nms_topn, Int, 304)
+     .ATTR(iou_threshold, Float, 0.7)
+     .ATTR(output_actual_rois_num, Bool, false)
+     .OP_END_FACTORY_REG(ProposalD)
+
+/**
+*@brief Performs plane or channel conversion on YoloV2.
+* If reverse=true: (N, H, W, C)->(N, H*stride, W*stride, C/(stride*stride))
+* If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride))
+
+*@par Inputs:
+*x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64. . \n
+
+*@par Attributes:
+*@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2".
+*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false" . \n
+
+*@par Outputs:
+*y: An (N, H, W, C) tensor. Has same type as "x" . \n
+
+*@attention Constraints:
+*@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*/
+REG_OP(PassThrough)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .ATTR(stride, Int, 2)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(PassThrough)
+
+/**
+*@brief Crops the input tensor x to the shape of size. For example:
+*(1) x: bottom to be cropped, with shape (20, 50, 512, 512);
+*(2) size: reference input for cropping, with shape (20, 10, 256, 256);
+*(3) axis = 1;
+*(4) offset = (25, 128, 128);
+*(5) y = x[:, 25:25 + size.shape[1], 128:128 + size.shape[2], 128:128 + size.shape[3]] . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32,int64, uint64.
+* @li size: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64.
+*@par Attributes:
+*@li axis: A required int32, specifying the first dimension to crop. Defaults to "2".
+*@li offset: A required array, specifying the shift for all/each dimension to align the cropped bottom with the reference bottom. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64.
+*@par Outputs:
+*y: A required Tensor. Has the same type and shape as "size" . \n
+
+*@attention Constraints:
+*@li "y" must have the same type and shape as "size". "x" must have the same type as "size".
+*@li "axis" must be less than the rank of "x".
+*@li The "offset" for each dimension must not exceed the maximum value of the corresponding dimension of "x".
+*@li The array length of "offset" plus the value of "axis" equals to the rank of "y".
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Crop.
+*/
+REG_OP(Crop)
+      .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
+      .INPUT(size, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
+      .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
+      .ATTR(axis, Int, 2)
+      .REQUIRED_ATTR(offsets, ListInt)
+      .OP_END_FACTORY_REG(Crop)
+
+/**
+*@brief Extends the input with copies of data along a specified dimension. For example:
+*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);
+*(2) axis = 1;
+*(3) tiles = 2;
+*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2) . \n
+
+*@par Inputs:
+* One input:
+*input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+
+*@par Attributes:
+*@li axis: An optional int32, specifying the axis to tile. Defaults to 1.
+*@li tiles: A required int32, specifying the number of copies (tiles) to output . \n
+
+*@par Outputs:
+*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+
+*@attention Constraints:
+*@li "axis" must be within the rank of the input tensor.
+*@li "tiles" must be greater than 1.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Tile.
+*/
+REG_OP(TileWithAxis)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT64, DT_INT32,
+    DT_INT16, DT_INT8, DT_UINT64, DT_UINT32, DT_UINT16, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT64, DT_INT32,
+    DT_INT16, DT_INT8, DT_UINT64, DT_UINT32, DT_UINT16, DT_UINT8}))
+    .ATTR(axis, Int, 1)
+    .REQUIRED_ATTR(tiles, Int)
+    .OP_END_FACTORY_REG(TileWithAxis)
+
+/**
+*@brief Read data with offset and stride . \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, int8 . \n
+
+*@par Attributes:
+*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ReadSelect)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(stride_list, ListInt, {1,1,1,1,1})
+    .OP_END_FACTORY_REG(ReadSelect)
+
+/**
+*@brief: Write data with offset . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(WriteSelect)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .OP_END_FACTORY_REG(WriteSelect)
+
+/**
+*@brief Read data by stride . \n
+
+*@par Inputs:
+*One input:
+*x: A Tensor. Must be one of the following types: float16, int8 . \n
+
+*@par Attributes:
+*@li axis: A required int32, specifying the index of axis to read by stride . \n
+
+*@par Attributes:
+*@li stride: A required int32, specifying the value of reading stride . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x".
+*/
+REG_OP(StridedRead)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(axis, Int, 1)
+    .ATTR(stride, Int, 1)
+    .OP_END_FACTORY_REG(StridedRead)
+
+/**
+*@brief: Write data by stride . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, int8 . \n
+
+*@par Attributes:
+*@li axis: A required int32, specifying the index of axis to write by stride . \n
+
+*@par Attributes:
+*@li stride: A required int32, specifying the value of writing stride . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*/
+REG_OP(StridedWrite)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(axis, Int, 1)
+    .ATTR(stride, Int, 1)
+    .OP_END_FACTORY_REG(StridedWrite)
+
+/**
+*@brief Computes the cumulative log sum exp of the tensor "x" along "axis" . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float32, float16.
+*@li axis A Tensor of type int32 or int16. Defaults to "0".
+*
+*@par Attributes:
+*@li exclusive: If "False", performs inclusive CumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*@li y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumsum.
+*/
+REG_OP(CumulativeLogsumexp)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .INPUT(axis, TensorType({DT_INT32, DT_INT16}))
+    .OUTPUT(y, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(CumulativeLogsumexp)
+
+/**
+*@brief Computes the cumulative log sum exp of the tensor "x" along "axis".
+*
+*@par Inputs:
+* One input:
+*x: A Tensor. Must be one of the following types: float32, float16.
+*
+*@par Attributes:
+*@li axis A Tensor of type int32 or int16. Defaults to "0".
+*@li exclusive: If "False", performs inclusive cumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp.
+*@li reverse: A bool. Defaults to "False".
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Cumsum.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead.
+*/
+REG_OP(CumulativeLogsumexpD)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(axis, Int)
+    .ATTR(exclusive, Bool, false)
+    .ATTR(reverse, Bool, false)
+    .OP_END_FACTORY_REG(CumulativeLogsumexpD)
+
+/**
+* @brief Add updates to var according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int16, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var". \n
+
+* @par Attributes:
+* @li axis: An required int to specify the axis to perform indices add. \n
+
+* @par Outputs:
+* @li var: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add_.
+*/
+REG_OP(InplaceIndexAdd)
+    .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                                DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(InplaceIndexAdd)
+
+/**
+* @brief Replace the value of X with value according to mask.
+* @par Inputs:
+* three inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32 or int32 or int8.
+*  @li mask: A Tensor of dtype float16 or float32 or int32 or int8.
+*  @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n
+
+* @par Outputs:
+*  @li y: A tensor. Must be one of the following dtypes:
+*   float16, float32, int32, int8.
+*/
+REG_OP(MaskedFill)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
+    .OP_END_FACTORY_REG(MaskedFill)
+
+/**
+* @brief Choose the value of X with value according to mask.
+
+* @par Inputs:
+* two inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32.
+*  @li mask: A Tensor of dtype is bool. \n
+
+* @par Outputs:
+*  @li y: A tensor with the same type as x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Numpy operator select.
+* Replaces the pytorch operator masked_select in some scenarios.\n
+*/
+REG_OP(MaskedSelectV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(MaskedSelectV2)
+
+/**
+* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
+
+* @par Attributes:
+* @li start: An  attribute of type Int, start index of last dim. \n
+* @li end: An  attribute of type Int, end index of last dim. \n
+* @li stride: An  attribute of type Int, stride of slice. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* No compatibility
+*/
+REG_OP(SliceLastDim)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(start, Int)
+    .REQUIRED_ATTR(end, Int)
+    .ATTR(stride, Int, 1)
+    .OP_END_FACTORY_REG(SliceLastDim)
+
+/**
+* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
+*   extracts a slice of size (end-begin)/stride from the given input tensor. \n
+*   Starting at the location specified by begin the slice continues by \n
+*   adding stride to the index until all dimensions are not less than end. \n
+*
+* @par Inputs:
+* Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
+*     complex128, float16, uint32, uint64, complex64, complex128. \n
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
+*
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select.
+*
+* @li axes: A Tensor of type int32 or int64, indicate axis to be select.
+*
+* @li strides: A Tensor of type int32 or int64, for the increment.
+*
+* @par Attributes:
+* @li begin_mask: A Tensor of type int32. \n
+*     A bitmask where a bit "i" being "1" means to ignore the begin \n
+*     value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32. \n
+*     Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" being "1" means the "i"th position \n
+*     is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" being "1" means the "i"th \n
+*     specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" implies that the "i"th \n
+*     specification should shrink the dimensionality.
+*
+* @par Outputs:
+* y: A Tensor. Has the same type as "x".
+*
+* @attention Constraints:
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSliceV2.
+*/
+REG_OP(StridedSliceV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSliceV2)
+
+/**
+*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist2: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li dim: A required int. Used to select the dimension of this tensor. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator IndexFill. \n
+*/
+REG_OP(IndexFillD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(IndexFillD)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/set_ops.h b/third_party/fwkacllib/inc/inc/ops/set_ops.h
new file mode 100644
index 00000000..04e04f1b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/set_ops.h
@@ -0,0 +1,181 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file set_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Applies set operation along last dimension of 2 Tensor inputs . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, string.
+* @li x2: A Tensor. Must have the same type as x1 . \n
+
+*@par Attributes:
+*@li set_operation: A string.
+*@li validate_indices: An optional bool. Defaults to True . \n
+
+*@par Outputs:
+*@li y_indices: A Tensor of type int64.
+*@li y_values: A Tensor. Has the same type as x1.
+*@li y_shape: A Tensor of type int64 . \n
+
+*@attention Constraints:
+*The implementation for DenseToDenseSetOperation on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow DenseToDenseSetOperation operator.
+*/
+REG_OP(DenseToDenseSetOperation)
+  .INPUT(x1, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                         DT_INT32, DT_INT64, DT_STRING}))
+  .INPUT(x2, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                         DT_INT32, DT_INT64, DT_STRING}))
+  .OUTPUT(y_indices, TensorType({DT_INT64}))
+  .OUTPUT(y_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                DT_INT32, DT_INT64, DT_STRING}))
+  .OUTPUT(y_shape, TensorType({DT_INT64}))
+  .ATTR(set_operation, String, "")
+  .ATTR(validate_indices, Bool, true)
+  .OP_END_FACTORY_REG(DenseToDenseSetOperation)
+
+/**
+*@brief Applies set operation along last dimension of Tensor and SparseTensor . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, string.
+* @li x2_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor.
+* @li x2_values: A Tensor. Must have the same type as set1. 1D Tensor, values of a SparseTensor.
+* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n
+
+*@par Attributes:
+*@li set_operation: A string.
+*@li validate_indices: An optional bool. Defaults to True . \n
+
+*@par Outputs:
+*@li y_indices: A Tensor of type int64.
+*@li y_values: A Tensor. Has the same type as x1.
+*@li y_shape: A Tensor of type int64 . \n
+
+*@attention Constraints:
+*The implementation for DenseToSparseSetOperation on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow DenseToSparseSetOperation operator.
+*/
+REG_OP(DenseToSparseSetOperation)
+    .INPUT(x1, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                           DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(x2_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                  DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(x2_shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                  DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(set_operation, String, "")
+    .ATTR(validate_indices, Bool, true)
+    .OP_END_FACTORY_REG(DenseToSparseSetOperation)
+
+/**
+*@brief Applies set operation along last dimension of 2 SparseTensor inputs . \n
+
+*@par Inputs:
+*Inputs include:
+* @li x1_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor.
+* @li x1_values: A Tensor. Must be one of the following types: int8, int16,
+      int32, int64, uint8, uint16, string. 1D Tensor, values of a SparseTensor.
+* @li x1_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor.
+* @li x2_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor.
+* @li x2_values: A Tensor. Must have the same type as set1_values. 1D Tensor, values of a SparseTensor.
+* @li x2_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n
+
+*@par Attributes:
+*@li set_operation: A string.
+*@li validate_indices: An optional bool. Defaults to True . \n
+
+*@par Outputs:
+*@li y_indices: A Tensor of type int64.
+*@li y_values: A Tensor. Has the same type as x1_values.
+*@li y_shape: A Tensor of type int64 . \n
+
+*@attention Constraints:
+*The implementation for SparseToSparseSetOperation on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow SparseToSparseSetOperation operator.
+*/
+REG_OP(SparseToSparseSetOperation)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                  DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(x2_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                  DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(x2_shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \
+                                  DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(set_operation, String, "")
+    .ATTR(validate_indices, Bool, true)
+    .OP_END_FACTORY_REG(SparseToSparseSetOperation)
+
+/**
+*@brief Number of unique elements along last dimension of input set . \n
+
+*@par Inputs:
+*Inputs include:
+* @li set_indices: A Tensor of type int64. 2D Tensor, indices of a SparseTensor.
+* @li set_values: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16.
+* @li set_shape: A Tensor of type int64. 1D Tensor, shape of a SparseTensor . \n
+
+*@par Attributes:
+*validate_indices: An optional bool. Defaults to True . \n
+
+*@par Outputs:
+*size: A Tensor of type int32 . \n
+
+*@attention Constraints:
+*The implementation for SetSize on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow SetSize operator.
+*/
+REG_OP(SetSize)
+    .INPUT(set_indices, TensorType({DT_INT64}))
+    .INPUT(set_values, TensorType({DT_INT8, DT_INT16, \
+        DT_UINT8, DT_UINT16, DT_INT32, DT_INT64, DT_STRING}))
+    .INPUT(set_shape, TensorType({DT_INT64}))
+    .OUTPUT(size, TensorType({DT_INT32}))
+    .ATTR(validate_indices, Bool, true)
+    .OP_END_FACTORY_REG(SetSize)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/inc/ops/sparse_ops.h
new file mode 100644
index 00000000..a1fc9ee6
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/sparse_ops.h
@@ -0,0 +1,1047 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file sparse_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Applies softmax to a batched ND SparseTensor . \n
+
+*@par Inputs:
+*The input must be a batched ND SparseTensor.
+* @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li values: A vector Tensor of type float or double. 1D. The values of the SparseTensor.
+* @li shape: A vector Tensor of type int64. 1D. The shape of the SparseTensor . \n
+
+*@par Outputs:
+*y: A vector Tensor. 1D. Has the same type as "values" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator SparseSoftmax.
+*/
+REG_OP(SparseSoftmax)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(SparseSoftmax)
+
+/**
+*@brief Adds up a SparseTensor and a dense Tensor, producing a dense Tensor . \n
+
+*@par Inputs:
+*Inputs "x1_*" must be SparseTensors and "x2" must be a dense Tensor.
+* @li x1_indices: A matrix Tensor of type int32 or int64. 2D. The indices of the SparseTensor.
+* @li x1_values: The values of the SparseTensor. A vector Tensor. 1D.
+* @li x1_shape: A vector Tensor of type int32 or int64. 1D. The shape of the SparseTensor.
+* @li x2: A matrix Tensor. Has the same type and same shape as the SparseTensors . \n
+
+*@par Outputs:
+*y: A matrix Tensor. Has the same type and same shape as "x2" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseTensorDenseAdd.
+*/
+
+REG_OP(SparseTensorDenseAdd)
+    .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, \
+        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, \
+        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, \
+        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseTensorDenseAdd)
+
+/**
+*@brief Reorders a SparseTensor into the canonical, row-major ordering . \n
+
+*@par Inputs:
+* @li indices: A matrix Tensor of type int32 or int64. 2D. The indices of the SparseTensor.
+* @li values: Values of the SparseTensor. A vector Tensor. 1D.
+* @li shape: A vector Tensor of type int32 or int64. 1D. The shape of the SparseTensor . \n
+
+*@par Outputs:
+*@li y_indices: The indices of the SparseTensor. Has the same type as "indices".
+*@li y_values: The values of the SparseTensorr. Has the same type as "values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseReorder.
+*/
+REG_OP(SparseReorder)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OP_END_FACTORY_REG(SparseReorder)
+
+/**
+*@brief Reshapes a SparseTensor to represent values in a new dense shape . \n
+
+*@par Inputs:
+* @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li shape: A vector Tensor of type int64. 1D. The shape of the SparseTensor.
+* @li new_shape: A 1D Tensor of type int64. The requested new dense shape . \n
+
+*@par Outputs:
+*@li y_indices: A Tensor of type int64. The indices of the new dense shape.
+*@li y_shape: A Tensor of type int64. The shape of the new dense shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseReshape.
+*/
+REG_OP(SparseReshape)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .INPUT(new_shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(SparseReshape)
+
+/**
+*@brief Adds up a SparseTensor and a dense Tensor.
+*@par Inputs:
+*(1) Broadcasts the dense side to have the same shape as the sparse side, if eligible;
+*(2) Then, only the dense values pointed to by the indices of the SparseTensor participate in the cwise addition.
+* @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li x1_values: The values of the SparseTensor. A vector Tensor. 1D.
+* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape.
+* @li x2: A dense Tensor of the same type as "x1_values" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1_values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseDenseCwiseAdd.
+*/
+REG_OP(SparseDenseCwiseAdd)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+                                  DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, \
+                                  DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                          DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                           DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseDenseCwiseAdd)
+
+/**
+*@brief Divides a SparseTensor by a dense Tensor . \n
+
+*@par Inputs:
+* @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li x1_values: The values of the SparseTensor. A vector Tensor. 1D.
+* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape.
+* @li x2: A dense Tensor of the same type as "x1_values" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1_values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseDenseCwiseDiv.
+*/
+REG_OP(SparseDenseCwiseDiv)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+                                  DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, \
+                                  DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                          DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                           DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseDenseCwiseDiv)
+
+/**
+*@brief Multiplies a SparseTensor by a dense Tensor . \n
+
+*@par Inputs:
+* @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li x1_values: The values of the SparseTensor. A vector Tensor. 1D.
+* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape.
+* @li x2: A dense Tensor of the same type as "x1_values" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x1_values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseDenseCwiseMul.
+*/
+REG_OP(SparseDenseCwiseMul)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+                                  DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, \
+                                  DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                          DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, \
+                           DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseDenseCwiseMul)
+
+/**
+*@brief Adds a SparseTensor to a SparseTensorsMap . \n
+
+*@par Inputs:
+* The input tensor must be a SparseTensor.
+* @li x1_indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li x1_values: The values of the SparseTensor. A vector Tensor. 1D.
+* @li x1_shape: A 1D Tensor of type int64. The requested new dense shape . \n
+
+*@par Attributes:
+*@li container: An optional string. Defaults to " ".
+*@li shared_name: An optional string. Defaults to " " . \n
+
+*@par Outputs:
+*handle: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AddSparseToTensorsMap.
+*/
+REG_OP(AddSparseToTensorsMap)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(handle, TensorType({DT_INT64}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(AddSparseToTensorsMap)
+
+/**
+*@brief The gradient operator for the SparseSlice op . \n
+
+*@par Inputs:
+* @li backprop_val_grad: A Tensor.
+* @li indices: A matrix Tensor of type int64. 2D. The indices of the SparseTensor.
+* @li start: A 1D Tensor of type int64. The start of the slice.
+* @li new_indices: A matrix Tensor of type int64. 2D. The indices of the sliced SparseTensor . \n
+
+*@par Outputs:
+*y_grad: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseSliceGrad.
+*/
+REG_OP(SparseSliceGrad)
+    .INPUT(backprop_val_grad, TensorType({ DT_INT8, DT_UINT8, DT_INT16,
+        DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(start, TensorType({DT_INT64}))
+    .INPUT(new_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_grad, TensorType({ DT_INT8, DT_UINT8, DT_INT16,
+        DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OP_END_FACTORY_REG(SparseSliceGrad)
+
+/**
+*@brief Slices a SparseTensor based on the "start" and "size" . \n
+
+*@par Inputs:
+* @li indices: A 2D Tensor of type int64. The indices of the SparseTensor.
+* @li values: A 1D Tensor. The values of the SparseTensor.
+* @li shape: A 2D Tensor of type int64. The shape of the SparseTensor.
+* @li start:  A 1D Tensor of type int64. The start of the slice.
+* @li size: A 1D Tensor of type int64. The size of the slice . \n
+
+*@par Outputs:
+*y_indices: A Tensor of type int64.
+*y_values: A Tensor. Has the same type as "values".
+*y_values: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseSlice.
+*/
+REG_OP(SparseSlice)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, \
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, \
+        DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .INPUT(start, TensorType({DT_INT64}))
+    .INPUT(size, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, \
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, \
+        DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(SparseSlice)
+
+/**
+*@brief The gradient operator for the SparseAdd op . \n
+
+*@par Inputs:
+* @li backprop_val_grad: A 1D Tensor with shape [nnz(sum)]. The gradient with respect to the non-empty values of the sum.
+* @li x1_indices: A 2D Tensor of type int64. The indices of the SparseTensor A, with size [nnz(A), ndims].
+* @li x2_indices: A 2D Tensor of type int64. The indices of the SparseTensor B, with size [nnz(B), ndims].
+* @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n
+
+*@par Outputs:
+*x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
+*x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseAddGrad.
+*/
+REG_OP(SparseAddGrad)
+    .INPUT(backprop_val_grad, TensorType({DT_INT8, DT_INT16, DT_INT32,
+                  DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(sum_indices, TensorType({DT_INT64}))
+    .OUTPUT(x1_val_grad, TensorType({DT_INT8, DT_INT16, DT_INT32,
+                  DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(x2_val_grad, TensorType({DT_INT8, DT_INT16, DT_INT32,
+                  DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseAddGrad)
+
+/**
+*@brief The gradient of SparseFillEmptyRows . \n
+
+*@par Inputs:
+* @li reverse_index_map: A 1D Tensor of type int64. The reverse index map from SparseFillEmptyRows.
+* @li grad_values: A 1D Tensor. The gradients from backprop . \n
+
+*@par Outputs:
+*@li y_value: A Tensor. Has the same type as "grad_values".
+*@li y_default_value: A Tensor. Has the same type as "grad_values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseFillEmptyRowsGrad.
+*/
+REG_OP(SparseFillEmptyRowsGrad)
+    .INPUT(reverse_index_map, TensorType({DT_INT64}))
+    .INPUT(grad_values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(y_value, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(y_default_value, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OP_END_FACTORY_REG(SparseFillEmptyRowsGrad)
+
+/**
+*@brief Multiplies SparseTensor A (of rank 2) by dense matrix B . \n
+
+*@par Inputs:
+* @li x1_indices: A 2D Tensor of type int32 or int64.
+* @li The indices of the matrix "SparseTensor", with size [nnz, 2].
+* @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz].
+* @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2].
+* @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n
+
+*@par Outputs:
+*y: A "Tensor". Has the same type as "x1_values" . \n
+
+*@par Attributes:
+*@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply.
+*@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
+*@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply.
+*@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseTensorDenseMatMul.
+*/
+REG_OP(SparseTensorDenseMatMul)
+    .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
+        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
+        DT_COMPLEX128, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
+        DT_COMPLEX128, DT_FLOAT16}))
+    .ATTR(adjoint_a, Bool, false)
+    .ATTR(adjoint_b, Bool, false)
+    .OP_END_FACTORY_REG(SparseTensorDenseMatMul)
+
+/**
+*@brief Converts a sparse representation into a dense tensor . \n
+
+*@par Inputs:
+* @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64.
+* @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor.
+* @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices",
+* @li or a scalar value to be used for all sparse indices.
+* @li default_value: A Tensor of the same type as "sparse_values" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "values" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseToDense.
+*/
+REG_OP(SparseToDense)
+    .INPUT(indices, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(output_shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL, DT_DOUBLE}))
+    .INPUT(default_value, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL, \
+        DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL, DT_DOUBLE}))
+    .ATTR(validate_indices, Bool, true)
+    .OP_END_FACTORY_REG(SparseToDense)
+
+/**
+*@brief Concatenates a list of `SparseTensor` along the specified dimension.
+*Concatenation is with respect to the dense versions of these sparse tensors . \n
+
+*@par Inputs:
+*3 or 5 inputs,contains:
+* @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D.
+*Indices of each input `SparseTensor`.It's a dynamic input.
+* @li values:A list with the same length as `indices` of `Tensor` objects with the same type.
+It's a dynamic input.
+* @li shapes:A list with the same length as `indices` of `Tensor` objects with type `int64`.1-D.
+* Shapes of each `SparseTensor`. It's a dynamic input. \n
+
+*@par Attributes:
+*@li concat_dim: An `int` Dimension to concatenate along
+*@li N:Number of sparse
+
+*@par Outputs:
+* @li y_indices:A `Tensor` of type `int64`.
+* @li y_values:A `Tensor`. Has the same type as `values`.
+* @li y_shape:A `Tensor` of type `int64` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseConcat operator in Tensorflow
+*/
+REG_OP(SparseConcat)
+    .DYNAMIC_INPUT(indices, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(values,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+                    DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+                    DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .DYNAMIC_INPUT(shapes, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values,
+        TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+                    DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+                    DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(concat_dim, Int, 0)
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(SparseConcat)
+
+/**
+*@brief Adds two `SparseTensor` objects to produce another `SparseTensor` . \n
+
+*@par Inputs:
+*7 inputs, contains:
+* @li x1_indices:A `Tensor` of type `int64`.2-D.
+* The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
+* @li x1_values:A `Tensor`. Must be one of the following types:float,int8,int16,int32,int64, float64.
+* @li x1_shape:A `Tensor` of type `int64`.1-D. The `shape` of the first `SparseTensor`,
+* size `[ndims]` Vector.
+* @li x2_indices:A `Tensor` of type `int64`.2-D.The `indices` of the second `SparseTensor`,
+* size `[nnz, ndims]` Matrix.
+* @li x2_values:A `Tensor`. Must have the same type as `a_values`.1-D.
+* The `values` of the second `SparseTensor`, size `[nnz]` Vector.
+* @li x2_shape:A `Tensor` of type `int64`.1-D.
+* The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
+* @li thresh:A `Tensor` 0-D.The magnitude threshold that determines if an output value/index pair takes space . \n
+
+*@par Outputs:
+* @li sum_indices:A `Tensor` of type `int64`.
+* @li sum_values:A `Tensor`. Has the same type as `x1_values`.
+* @li sum_shape:A `Tensor` of type `int64` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseAdd operator in Tensorflow
+*/
+REG_OP(SparseAdd)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_FLOAT, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(x2_values, TensorType({DT_FLOAT, DT_INT8, DT_INT16, DT_INT32, \
+        DT_INT64, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2_shape, TensorType({DT_INT64}))
+    .INPUT(thresh, TensorType({DT_FLOAT, DT_INT8, DT_INT16, DT_INT32, \
+        DT_INT64, DT_DOUBLE}))
+    .OUTPUT(sum_indices, TensorType({DT_INT64}))
+    .OUTPUT(sum_values, TensorType({DT_FLOAT, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(sum_shape, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(SparseAdd)
+
+/**
+*@brief Fills empty rows in the input 2-D `SparseTensor` with a default value . \n
+
+*@par Inputs:
+*4 inputs,contains:
+* @li indices: A `Tensor` of type `int64`.2-D. the indices of the sparse tensor.
+* @li values: A `Tensor`. 1-D. the values of the sparse tensor.
+* @li dense_shape: A `Tensor` of type `int64`.1-D. the shape of the sparse tensor.
+* @li default_value: `Tensor`. Must have the same type as `values`.
+*0-D. default value to insert into location `[row, 0, ..., 0]`
+*for rows missing from the input sparse tensor . \n
+
+*@par Outputs:
+* @li y_indices:A `Tensor` of type `int64`.
+* @li y_values:A `Tensor`. Has the same type as `values`.
+* @li empty_row_indicator:A `Tensor` of type `bool`.
+* @li reverse_index_map:A `Tensor` of type `int64` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseFillEmptyRows operator in Tensorflow
+*/
+REG_OP(SparseFillEmptyRows)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(dense_shape, TensorType({DT_INT64}))
+    .INPUT(default_value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \
+        DT_INT16, DT_UINT16, DT_UINT8, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, \
+        DT_INT16, DT_UINT16, DT_UINT8, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(empty_row_indicator, TensorType({DT_BOOL}))
+    .OUTPUT(reverse_index_map, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(SparseFillEmptyRows)
+
+/**
+*@brief Returns the element-wise max of two SparseTensors . \n
+
+*@par Inputs:
+*6 inputs,contains:
+* @li x1_indices:A `Tensor` of type `int64`.2-D.
+*`N x R` matrix with the indices of non-empty values in a SparseTensor,
+* in the canonical lexicographic ordering.
+* @li x1_values:A `Tensor`. 1-D. the values of the sparse tensor.
+* @li x1_shape:A `Tensor` of type `int64`.1-D. the shape of the sparse tensor.
+* @li x2_indices:A `Tensor` of type `int64`.2-D. the indices of the sparse tensor.
+* @li x2_values:A `Tensor`. 1-D. Must have the same type as `x1_values`.
+* @li x2_shape:A `Tensor` of type `int64`.1-D.
+*counterpart to `a_shape` for the other operand; the two shapes must be equal . \n
+
+*@par Outputs:
+* @li y_indices:A `Tensor` of type `int64`.
+* @li y_values:A `Tensor`. Has the same type as `x1_values` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseSparseMaximum operator in Tensorflow
+*/
+REG_OP(SparseSparseMaximum)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(x2_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(x2_shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(SparseSparseMaximum)
+
+/**
+*@brief Returns the element-wise min of two SparseTensors . \n
+
+*@par Inputs:
+*6 inputs,contains:
+* @li x1_indices:A `Tensor` of type `int64`.2-D.
+*`N x R` matrix with the indices of non-empty values in a SparseTensor,
+* in the canonical lexicographic ordering.
+* @li x1_values:A `Tensor`. 1-D. the values of the sparse tensor.
+* @li x1_shape:A `Tensor` of type `int64`.1-D. the shape of the sparse tensor.
+* @li x2_indices:A `Tensor` of type `int64`.2-D. the indices of the sparse tensor.
+* @li x2_values:A `Tensor`. 1-D. Must have the same type as `x1_values`.
+* @li x2_shape:A `Tensor` of type `int64`.1-D.
+*counterpart to `a_shape` for the other operand; the two shapes must be equal . \n
+
+*@par Outputs:
+* @li y_indices:A `Tensor` of type `int64`.
+* @li y_values:A `Tensor`. Has the same type as `x1_values` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseSparseMinimum operator in Tensorflow
+*/
+REG_OP(SparseSparseMinimum)
+    .INPUT(x1_indices, TensorType({DT_INT64}))
+    .INPUT(x1_values, TensorType({DT_INT64, DT_INT32, \
+        DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, DT_FLOAT16, \
+        DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x1_shape, TensorType({DT_INT64}))
+    .INPUT(x2_indices, TensorType({DT_INT64}))
+    .INPUT(x2_values, TensorType({DT_INT64, DT_INT32, \
+        DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, DT_FLOAT16, \
+        DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x2_shape, TensorType({DT_INT64}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_INT64, DT_INT32, \
+        DT_UINT16, DT_INT16, DT_UINT8, DT_INT8, DT_FLOAT16, \
+        DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(SparseSparseMinimum)
+
+/**
+*@brief Computes the max of elements across dimensions of a SparseTensor . \n
+
+*@par Inputs:
+*4 or 5 inputs,contains:
+* @li x_indices:A `Tensor` of type `int64`.2-D.
+*`N x R` matrix with the indices of non-empty values in a
+*SparseTensor, possibly not in canonical ordering.
+* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor.
+*`N` non-empty values corresponding to `input_indices`.
+* @li x_shape:A `Tensor` of type `int64`.1-D.  Shape of the input SparseTensor.
+* @li reduction_axes:A `Tensor` of type `int32`.1-D.
+*Length-`K` vector containing the reduction axes . \n
+
+*@par Attributes:
+* keep_dims:An optional `bool`. Defaults to `False`.
+*If true, retain reduced dimensions with length 1 . \n
+
+*@par Outputs:
+* y:A `Tensor`. Has the same type as `input_values` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseReduceMax operator in Tensorflow
+*/
+REG_OP(SparseReduceMax)
+    .INPUT(x_indices, TensorType({DT_INT64}))
+    .INPUT(x_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(x_shape, TensorType({DT_INT64}))
+    .INPUT(reduction_axes, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SparseReduceMax)
+
+/**
+*@brief Computes the max of elements across dimensions of a SparseTensor . \n
+
+*@par Inputs:
+*4 or 5 inputs,contains:
+* @li x_indices:A `Tensor` of type `int64`.2-D.
+*`N x R` matrix with the indices of non-empty values in a
+*SparseTensor, possibly not in canonical ordering.
+* @li x_values:A `Tensor`. 1-D. the values of the sparse tensor.
+*`N` non-empty values corresponding to `input_indices`.
+* @li x_shape:A `Tensor` of type `int64`.1-D.  Shape of the input SparseTensor.
+* @li reduction_axes:A `Tensor` of type `int32`.1-D.
+*Length-`K` vector containing the reduction axes . \n
+
+*@par Attributes:
+* keep_dims:An optional `bool`. Defaults to `False`.
+*If true, retain reduced dimensions with length 1 . \n
+
+*@par Outputs:
+* @li y_indices:A `Tensor` of type `int64`.
+* @li y_values:A `Tensor`. Has the same type as `input_values`.
+* @li y_shape:A `Tensor` of type `int64` . \n
+
+*@par Third-party framework compatibility
+* Compatible SparseReduceMaxSparse operator in Tensorflow
+*/
+REG_OP(SparseReduceMaxSparse)
+    .INPUT(x_indices, TensorType({DT_INT64}))
+    .INPUT(x_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .INPUT(x_shape, TensorType({DT_INT64}))
+    .INPUT(reduction_axes, TensorType({DT_INT32}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SparseReduceMaxSparse)
+
+/**
+*@brief Computes the sum of elements across dimensions of a SparseTensor . \n
+
+*@par Inputs:
+*4 or 5 inputs, including:
+* @li x_indices: A 2D Tensor of type int64.
+*"N x R" matrix with the indices of non-empty values in a
+*SparseTensor, possibly not in canonical ordering.
+* @li x_values: A 1D Tensor. The values of the SparseTensor.
+*"N" non-empty values corresponding to "input_indices".
+* @li x_shape: A 1D Tensor of type int64. Shape of the input SparseTensor.
+* @li reduction_axes: A 1D Tensor of type int32.
+*A length-"K" vector containing the reduction axes . \n
+
+*@par Attributes:
+* keep_dims: An optional bool. Defaults to "False".
+*If true, retains reduced dimensions with length 1 . \n
+
+*@par Outputs:
+* @li y_indices: A Tensor of type int64.
+* @li y_values: A Tensor. Has the same type as "input_values".
+* @li y_shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseReduceSum.
+*/
+REG_OP(SparseReduceSum)
+    .INPUT(x_indices, TensorType({DT_INT64}))
+    .INPUT(x_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+                      DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE, \
+                      DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x_shape, TensorType({DT_INT64}))
+    .INPUT(reduction_axes, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                           DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE, \
+                           DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SparseReduceSum)
+
+/**
+*@brief Computes the sum of elements across dimensions of a SparseTensor . \n
+
+*@par Inputs:
+*4 or 5 inputs, including:
+* @li x_indices: A 2D Tensor of type int64.
+*"N x R" matrix with the indices of non-empty values in a
+*SparseTensor, possibly not in canonical ordering.
+* @li x_values: A 1D Tensor. The values of the SparseTensor.
+*"N" non-empty values corresponding to "input_indices".
+* @li x_shape: A 1D Tensor of type int64. Shape of the input SparseTensor.
+* @li reduction_axes: A 1D Tensor of type int32.
+* A length-"K" vector containing the reduction axes . \n
+
+*@par Attributes:
+* keep_dims: An optional bool. Defaults to "False".
+*If true, retains reduced dimensions with length 1 . \n
+
+*@par Outputs:
+* @li y_indices: A Tensor of type int64.
+* @li y_values: A Tensor. Has the same type as "input_values".
+* @li y_shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseReduceSumSparse.
+*/
+REG_OP(SparseReduceSumSparse)
+    .INPUT(x_indices, TensorType({DT_INT64}))
+    .INPUT(x_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(x_shape, TensorType({DT_INT64}))
+    .INPUT(reduction_axes, TensorType({DT_INT32}))
+    .OUTPUT(y_indices, TensorType({DT_INT64}))
+    .OUTPUT(y_values, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
+        DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(SparseReduceSumSparse)
+
+/**
+*@brief Splits a SparseTensor into "num_split" tensors along one dimension . \n
+
+*@par Inputs:
+*4 or 5 inputs, including:
+* @li split_dim: A 0D Tensor of type int64.
+*The dimension along which to split. Must be in the range "[0, rank(shape))".
+* @li indices: A 2D Tensor of type int64.
+* The indices of the SparseTensor.
+* @li values: A 1D Tensor. The values of the SparseTensor.
+* @li shape: A 1D Tensor of type int64. Shape of the SparseTensor . \n
+
+*@par Attributes:
+* num_split: An int that is >= 1. The number of ways to split . \n
+
+*@par Outputs:
+* @li y_indices: A list of "num_split" Tensor objects of type int64.
+* @li y_values: A list of "num_split" Tensor objects with the same type as "values".
+* @li y_shape: A list of "num_split" Tensor objects of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseSplit.
+*/
+REG_OP(SparseSplit)
+    .INPUT(split_dim, TensorType({DT_INT64}))
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, \
+        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, \
+        DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(y_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(y_values, TensorType({DT_INT64, DT_INT32, DT_UINT16, \
+        DT_INT16, DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_BOOL, DT_STRING, DT_RESOURCE}))
+    .DYNAMIC_OUTPUT(y_shape, TensorType({DT_INT64}))
+    .ATTR(num_split, Int, 1)
+    .OP_END_FACTORY_REG(SparseSplit)
+
+/**
+*@brief Generates sparse cross from a list of sparse and dense tensors . \n
+
+*@par Inputs:
+*8 or 10 inputs, including:
+* @li indices: A list of 2D Tensor objects of type int64.
+* Indices of each input SparseTensor.It's a dynamic input.
+* @li values: A list of 1D Tensor objects of type int64 or string.
+* Values of each SparseTensor.It's a dynamic input.
+* @li shapes: A list with the same length as "indices" of 1D Tensor objects of type int64.
+* Shapes of each SparseTensor.It's a dynamic input.
+* @li dense_inputs: A list of 2D Tensor objects of type int64 or string.
+* Columns represented by dense Tensor .It's a dynamic input. \n
+
+*@par Attributes:
+* @li N: number of sparse.
+* @li hashed_output: A bool. If true, returns the hash of the cross instead of the string.
+* @li num_buckets: An int that is >= 0. It is used if "hashed_output" is true.
+*output = hashed_value%num_buckets if num_buckets > 0 else "hashed_value".
+* @li hash_key: An int. Specify the hash_key that will be used by the "FingerprintCat64"
+*function to combine the crosses fingerprints.
+* @li out_type: An int64 or string.
+* @li internal_type: An int64 or string . \n
+
+*@par Outputs:
+* @li output_indices: A Tensor of type int64.
+* @li output_values: A Tensor of type "out_type".
+* @li output_shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SparseCross.
+*/
+REG_OP(SparseCross)
+    .DYNAMIC_INPUT(indices, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(values, TensorType({DT_INT64, DT_STRING}))
+    .DYNAMIC_INPUT(shapes, TensorType({DT_INT64}))
+    .DYNAMIC_INPUT(dense_inputs, TensorType({DT_INT64, DT_STRING}))
+    .OUTPUT(output_indices, TensorType({DT_INT64}))
+    .OUTPUT(output_values, TensorType({DT_INT64, DT_STRING}))
+    .OUTPUT(output_shape, TensorType({DT_INT64}))
+    .ATTR(N, Int, 0)
+    .REQUIRED_ATTR(hashed_output, Bool)
+    .ATTR(num_buckets, Int, 0)
+    .REQUIRED_ATTR(hash_key, Int)
+    .REQUIRED_ATTR(out_type, Type)
+    .REQUIRED_ATTR(internal_type, Type)
+    .OP_END_FACTORY_REG(SparseCross)
+
+/**
+*@brief Generates sparse cross from a list of sparse and dense tensors . \n
+
+*@par Inputs:
+*3 or 5 inputs, including:
+* @li indices: A 2D Tensor of type int64.
+* The "indices" of the minibatch SparseTensor.
+* @li values: A 1D Tensor. The "values" of the minibatch SparseTensor.
+* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor . \n
+
+*@par Attributes:
+* @li container: An optional string. Defaults to "".
+*The container name for the "SparseTensorsMap" created by this op.
+* @li shared_name: An optional string. Defaults to "".
+*The shared name for the "SparseTensorsMap" created by this op . \n
+
+*@par Outputs:
+* handles: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AddManySparseToTensorsMap.
+*/
+REG_OP(AddManySparseToTensorsMap)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(handles, TensorType({DT_INT64}))
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(AddManySparseToTensorsMap)
+
+/**
+*@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n
+
+*@par Inputs:
+*2 or 4 inputs, including:
+* handles: A 1D Tensor of type int64.
+* The "N" serialized SparseTensor objects . \n
+
+*@par Attributes:
+* @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap".
+* @li container: An optional string. Defaults to "".
+*The container name for the "SparseTensorsMap" read by this op.
+* @li shared_name: An optional string. Defaults to "".
+*The shared name for the "SparseTensorsMap" read by this op . \n
+
+*@par Outputs:
+* @li indices: A Tensor of type int64.
+* @li values: A Tensor of type "dtype".
+* @li shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator TakeManySparseFromTensorsMap.
+*/
+REG_OP(TakeManySparseFromTensorsMap)
+    .INPUT(handles, TensorType({DT_INT64}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(TakeManySparseFromTensorsMap)
+
+/**
+*@brief Serializes a SparseTensor into a [3] Tensor object . \n
+
+*@par Inputs:
+*3 or 4 inputs, including:
+* @li indices: A 2D Tensor of type int64. The indices of the SparseTensor.
+* @li values: A 1D Tensor. The values of the SparseTensor.
+* @li shape: A 1D Tensor of type int64. The shape of the SparseTensor . \n
+
+*@par Attributes:
+* out_type: An optional type. Defaults to "string" . \n
+
+*@par Outputs:
+* serialized_sparse: A Tensor of type "out_type" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SerializeSparse.
+*/
+REG_OP(SerializeSparse)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(serialized_sparse, TensorType({DT_STRING}))
+    .ATTR(out_type, Type, DT_STRING)
+    .OP_END_FACTORY_REG(SerializeSparse)
+
+/**
+*@brief Serializes an "N"-minibatch SparseTensor into an [N, 3] Tensor object . \n
+
+*@par Inputs:
+*3 or 4 inputs, including:
+* @li indices: A 2D Tensor of type int64. The "indices" of the minibatch SparseTensor.
+* @li values: A 1D Tensor. The "values" of the minibatch SparseTensor.
+* @li shape: A 1D Tensor of type int64. The "shape" of the minibatch SparseTensor . \n
+
+*@par Attributes:
+* out_type: An optional type. Defaults to "string" . \n
+
+*@par Outputs:
+* serialized_sparse: A Tensor of type "out_type" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SerializeManySparse.
+*/
+REG_OP(SerializeManySparse)
+    .INPUT(indices, TensorType({DT_INT64}))
+    .INPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .INPUT(shape, TensorType({DT_INT64}))
+    .OUTPUT(serialized_sparse, TensorType({DT_STRING}))
+    .ATTR(out_type, Type, DT_STRING)
+    .OP_END_FACTORY_REG(SerializeManySparse)
+
+/**
+*@brief Deserializes SparseTensor objects . \n
+
+*@par Inputs:
+*Two inputs, including:
+* serialized_sparse: A Tensor. The serialized SparseTensor objects.
+*The last dimension must have 3 columns . \n
+
+*@par Attributes:
+* dtype: An optional type. The type of the serialized SparseTensor objects . \n
+
+*@par Outputs:
+* @li indices: A Tensor of type int64.
+* @li values: A Tensor of type "dtype".
+* @li shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator DeserializeSparse.
+*/
+REG_OP(DeserializeSparse)
+    .INPUT(serialized_sparse, TensorType({DT_STRING}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(DeserializeSparse)
+
+/**
+*@brief Deserializes and concatenates SparseTensors from a serialized minibatch . \n
+
+*@par Inputs:
+*Two inputs, including:
+* serialized_sparse: A 2D Tensor of type string.
+*The "N" serialized SparseTensor objects. Must have 3 columns . \n
+
+*@par Attributes:
+* dtype: An optional type. The type of the serialized SparseTensor objects . \n
+
+*@par Outputs:
+* @li indices: A Tensor of type int64.
+* @li values: A Tensor of type "dtype".
+* @li shape: A Tensor of type int64 . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator DeserializeManySparse.
+*/
+REG_OP(DeserializeManySparse)
+    .INPUT(serialized_sparse, TensorType({DT_STRING}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, \
+        DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(DeserializeManySparse)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/inc/ops/spectral_ops.h
new file mode 100644
index 00000000..82accc73
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/spectral_ops.h
@@ -0,0 +1,148 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file spectral_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Computes the inverse 1-dimensional discrete Fourier transform over the
+inner-most dimension of `x`. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor of the same rank as `x`. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT operator.
+*/
+REG_OP(IFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT)
+
+/**
+*@brief Real-valued fast Fourier transform . \n
+
+*@par Inputs:
+*@li input: A float32 tensor.
+*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n
+
+*@par Outputs:
+*@li y: A complex64 tensor of the same rank as `input`. The inner-most
+dimension of `input` is replaced with the `fft_length / 2 + 1` unique
+frequency components of its 1D Fourier transform . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow RFFT operator.
+*/
+REG_OP(RFFT)
+    .INPUT(input, TensorType({DT_FLOAT}))
+    .INPUT(fft_length, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64}))
+    .OP_END_FACTORY_REG(RFFT)
+
+/**
+*@brief Inverse real-valued fast Fourier transform . \n
+
+*@par Inputs:
+*@li x: A complex64 tensor.
+*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n
+
+*@par Outputs:
+*@li y: A float32 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length` samples of its inverse
+  1D Fourier transform . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IRFFT operator.
+*/
+REG_OP(IRFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64}))
+    .INPUT(fft_length, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(IRFFT)
+
+
+/**
+*@brief 2D fast Fourier transform. \n
+
+*@par Inputs:
+*@li x: A complex64 tensor..
+
+*@par Outputs:
+*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform.\n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT2D operator.
+*/
+REG_OP(FFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT2D)
+
+/**
+*@brief Calculate the one-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its 1-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT operator.
+*/
+REG_OP(FFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT)
+
+/**
+*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its inverse two-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT2D operator.
+*/
+REG_OP(IFFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT2D)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/inc/ops/split_combination_ops.h
new file mode 100644
index 00000000..af2c37bc
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/split_combination_ops.h
@@ -0,0 +1,389 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file split_combination_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An ND Tensor.
+*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
+
+*@par Attributes:
+*num_split: A required int32. Specifies the number of output tensors. No default value . \n
+
+*@par Outputs:
+*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li "num_split" is greater than or equals to 1.
+*@li "num_split" is divisible by the size of dimension "split_dim".
+*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Split.
+*/
+REG_OP(Split)
+    .INPUT(split_dim, TensorType({DT_INT32}))
+    .INPUT(x, TensorType::BasicType())
+    .DYNAMIC_OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(num_split, Int)
+    .OP_END_FACTORY_REG(Split)
+
+/**
+*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors . \n
+
+*@par Inputs:
+* One input:
+*: An ND Tensor.
+*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
+
+*@par Attributes:
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
+
+*@par Outputs:
+*y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li "num_split" is greater than or equals to 1.
+*@li "num_split" is divisible by the size of dimension "split_dim".
+*@li "split_dim" is in the range [-len(x.shape), (x.shape)-1] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Split.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead.
+*/
+REG_OP(SplitD)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                                    DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                                             DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(split_dim, Int)
+    .REQUIRED_ATTR(num_split, Int)
+    .OP_END_FACTORY_REG(SplitD)
+
+/**
+*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits" . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: An ND Tensor.
+*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
+
+*@par Attributes:
+*num_split: A required int32. Specifies the number of output tensors. No default value . \n
+
+*@par Outputs:
+*y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li Each element in "size_splits" is greater than or equal to 1.
+*@li "size_splits" and "num_split" have the same length.
+*@li The elements in "size_splits" sum to the size of dimension "split_dim" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SplitV.
+*/
+REG_OP(SplitV)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(size_splits, TensorType::IndexNumberType())
+    .INPUT(split_dim, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(num_split, Int)
+    .OP_END_FACTORY_REG(SplitV)
+
+/**
+*@brief Splits a tensor along dimension "split_dim" into "num_split" smaller tensors according to "size_splits" . \n
+
+*@par Inputs:
+* One input:
+* x: An ND Tensor.
+*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
+
+*@par Attributes:
+*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
+
+*@par Outputs:
+*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li Each element in "size_splits" is greater than or equal to 1.
+*@li "size_splits" and "num_split" have the same length.
+Under the caffe framework, the conversion of slice_point through the cut point to cut segment is mapped to size_splits.
+*@li The elements in "size_splits" sum to the size of dimension "split_dim".
+Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect.
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SplitV.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead.
+*/
+REG_OP(SplitVD)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                                    DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                                             DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(size_splits, ListInt)
+    .REQUIRED_ATTR(split_dim, Int)
+    .REQUIRED_ATTR(num_split, Int)
+    .OP_END_FACTORY_REG(SplitVD)
+
+/**
+*@brief Concatenates a list of N tensors along the first dimension.
+*@par Inputs:
+* Two inputs, including:
+* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
+*     int64, uint8, uint16, uint32, uint64, float16, float32.
+*     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
+*     It's a dynamic input.
+* @li shape: A Tensor of the same type as "x".
+* The final shape of the result. Should be equal to the shapes of any input
+* but with the number of input values in the first dimension . \n
+
+*@par Attributes:
+* @li shape: A required list of ints.
+* @li N: The numble of dynamic_input "values" . \n
+
+*@par Outputs:
+*output_data: The concatenated tensor with same type as "values".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator ParallelConcat.
+*/
+REG_OP(ParallelConcat)
+    .DYNAMIC_INPUT(values, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .OUTPUT(output_data, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(ParallelConcat)
+
+/**
+*@brief Concatenates tensors along one dimension . \n
+
+*@par Inputs:
+* One input:
+*x: Dynamic input.An NC1HWC0 or ND Tensor.
+*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
+
+*@par Attributes:
+*concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value.
+*N: An attribute int8, int16, int32, or int64. Specifies the number of elements in "x". Defaults to "1".
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li "x" is a list of at least 2 "tensor" objects of the same type.
+*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ConcatV2.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ConcatV2 instead.
+*/
+REG_OP(ConcatV2D)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT64, DT_UINT64, DT_UINT32, DT_INT16, DT_UINT16, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT64, DT_UINT64, DT_UINT32, DT_INT16, DT_UINT16, DT_UINT8}))
+    .REQUIRED_ATTR(concat_dim, Int)
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(ConcatV2D)
+
+/**
+*@brief Concatenates tensors along one dimension . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li Dynamic input "x" is An NC1HWC0 or ND Tensor.
+*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
+*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n
+
+*@par Attributes:
+*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+* "x" is a list of at least 2 "tensor" objects of the same type . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ConcatV2.
+*/
+REG_OP(ConcatV2)
+    .DYNAMIC_INPUT(x, TensorType::BasicType())
+    .INPUT(concat_dim, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(ConcatV2)
+
+/**
+*@brief Concatenates tensors along one dimension . \n
+
+*@par Inputs:
+* One input:
+*x:Dynamic input. An NC1HWC0 or ND Tensor.
+*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
+
+*@par Attributes:
+*@li concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value.
+*@li N:  An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li "x" is a list of at least 2 "tensor" objects of the same type.
+*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Concat.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Concat instead.
+*/
+REG_OP(ConcatD)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
+    .REQUIRED_ATTR(concat_dim, Int)
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(ConcatD)
+
+/**
+*@brief Concatenates tensors along one dimension . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: Dynamic input.An NC1HWC0 or ND Tensor.
+*Must be one of the following types: float16, float32, double, int32,
+*     uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16,
+*     complex128, uint32, uint64, qint16, quint16.
+*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n
+
+*@par Attributes:
+*N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as "x" . \n
+
+*@attention Constraints:
+*@li "x" is a list of at least 2 "tensor" objects of the same type.
+*@li "concat_dim" is in the range [-len(x.shape), len(x.shape)] . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Concat.
+*/
+REG_OP(Concat)
+    .DYNAMIC_INPUT(x, TensorType::BasicType())
+    .INPUT(concat_dim, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(N, Int, 1)
+    .OP_END_FACTORY_REG(Concat)
+
+/**
+*@brief Packs the list of tensors in values into a tensor with rank one higher than each tensor in
+* values, by packing them along the axis dimension. Given a list of length N of tensors of
+* shape (A, B, C); if axis == 0 then the output tensor will have the shape (N, A, B, C) . \n
+
+*@par Inputs:
+* x: A list of N Tensors. Must be one of the following types: int8, int16, int32,
+*     int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n
+
+*@par Attributes:
+*@li axis: A optional int, defaultvalue is 0.
+*     Dimension along which to pack. The range is [-(R+1), R+1).
+*@li N: A required int. Number of tensors . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Pack.
+It's a dynamic output.
+*/
+REG_OP(Pack)
+    .DYNAMIC_INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .ATTR(axis, Int, 0)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(Pack)
+
+/**
+*@brief Computes offsets of concat inputs within its output . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li concat_dim: A Tensor of type int32.
+* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
+
+*@par Attributes:
+*N: A required int . \n
+
+*@par Outputs:
+*y: A Tensor list with same type as "x" . It's a dynamic output. \n
+
+*@par Third-party framework compatibility
+*@ Compatible with the TensorFlow operator ConcatOffset.
+*/
+REG_OP(ConcatOffset)
+    .INPUT(concat_dim, TensorType({DT_INT32}))
+    .DYNAMIC_INPUT(x, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(ConcatOffset)
+
+/**
+*@brief Computes offsets of concat inputs within its output . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li concat_dim: A Tensor of type int32.
+* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n
+
+*@par Attributes:
+*@li Concat_dim: A required int. Must be within the rank of input "x".
+*@li N: A required int . \n
+
+*@par Outputs:
+*y: A Tensor list with same type as "x" . It's a dynamic output. \n
+
+*@par Third-party framework compatibility
+*@ Compatible with the TensorFlow operator ConcatOffset.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use ConcatOffset instead.
+*/
+REG_OP(ConcatOffsetD)
+    .DYNAMIC_INPUT(x, TensorType({DT_INT32}))
+    .DYNAMIC_OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(concat_dim, Int)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(ConcatOffsetD)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/state_ops.h b/third_party/fwkacllib/inc/inc/ops/state_ops.h
new file mode 100644
index 00000000..3c8e32b6
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/state_ops.h
@@ -0,0 +1,167 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file state_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Creates a variable tensor . \n
+
+*@par Inputs:
+*x: A tensor, used to assign a value to the variable tensor internally.
+The caller does not need to pass the value of the variable tensor . \n
+
+*@par Attributes:
+*@li index: An integer. Index of the input tensor.
+*@li value: A tensor, used to pass and record the value of the variable tensor.
+*@li container: A string. The container of the variable tensor.
+*@li shared_name: A string. The shared name of the variable tensor . \n
+
+*@par Outputs:
+*y: The created variable tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Variable.
+*/
+REG_OP(Variable)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \
+        DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .ATTR(index, Int, 0)
+    .ATTR(value, Tensor, Tensor())
+    .ATTR(container, String, "")
+    .ATTR(shared_name, String, "")
+    .OP_END_FACTORY_REG(Variable)
+
+/**
+*@brief Returns a temporary variable tensor. After the use of TemporaryVariable,
+pass the reference to the variable tensor to the matching DestroyTemporaryVariable op for destruction . \n
+
+*@par Attributes:
+*@li shape: A required list of int32 or int64. The shape of the variable tensor.
+*@li dtype: Required. The type of elements in the variable tensor.
+*@li var_name: An optional string. The name of the variable to be created . \n
+
+*@par Outputs:
+*y: The created variable tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator TemporaryVariable.
+*/
+REG_OP(TemporaryVariable)
+    .OUTPUT(y, TensorType::ALL())
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(dtype, Int)
+    .ATTR(var_name, String, "")
+    .OP_END_FACTORY_REG(TemporaryVariable)
+
+/**
+*@brief Destroys the temporary variable and returns its final value.
+All other uses of the temporary variable must have been executed before this op . \n
+
+*@par Inputs:
+*x: A reference to the temporary variable tensor . \n
+
+*@par Attributes:
+*var_name: A required string. Name of the temporary variable.
+Must be the same as the "var_name" attribute of the reference to the temporary variable tensor . \n
+
+*@par Outputs:
+*y: Final value of the reference to the temporary variable tensor . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator DestroyTemporaryVariable.
+*/
+REG_OP(DestroyTemporaryVariable)
+    .INPUT(x, TensorType::ALL())
+    .OUTPUT(y, TensorType::ALL())
+    .ATTR(var_name, String, "")
+    .OP_END_FACTORY_REG(DestroyTemporaryVariable)
+
+/**
+*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n
+
+*@par Inputs:
+*x: A tensor . \n
+
+*@par Outputs:
+*y: A tensor, indicating whether "x" has been initialized . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator IsVariableInitialized.
+*/
+REG_OP(IsVariableInitialized)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(IsVariableInitialized)
+
+/**
+*@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n
+
+*@par Inputs:
+*x: A tensor . \n
+
+*@par Outputs:
+*y: A tensor, indicating whether "x" has been initialized, and the data type is boolean . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator VarIsInitializedOp.
+*/
+REG_OP(VarIsInitializedOp)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(VarIsInitializedOp)
+
+/**
+*@brief Increments 'ref' until it reaches 'limit' . \n
+
+*@par Inputs:
+*Inputs include:
+*ref: A mutable Tensor. Must be one of the following types: int32, int64 . \n
+
+*@par Attributes:
+*limit: An int. If incrementing ref would bring it above limit, instead
+ generates an 'OutOfRange' error . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as ref . \n
+
+*@attention Constraints:
+*The implementation for CountUpTo on Ascend uses AICPU, with bad performance.
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow CountUpTo operator.
+*/
+REG_OP(CountUpTo)
+    .INPUT(ref, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(limit, Int, 0)
+    .OP_END_FACTORY_REG(CountUpTo)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/inc/ops/stateful_random_ops.h
new file mode 100644
index 00000000..c2f65c6a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/stateful_random_ops.h
@@ -0,0 +1,236 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file stateful_random_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Non-deterministically generates some integers . \n
+
+*@par Inputs:
+*This op may use some OS-provided source of non-determinism (e.g. an RNG),
+*so each execution will give different results. Inputs included:
+*@li shape: The shape of the output tensor . \n
+
+*@par Outputs:
+*y:A Returns Non-deterministic integer values with specified shape . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow NonDeterministicInts operator.
+*/
+
+REG_OP(NonDeterministicInts)
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32,DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(NonDeterministicInts)
+
+/**
+*@brief Advance the counter of a counter-based RNG. The state of the RNG after
+*`rng_skip(n)` will be the same as that after `stateful_uniform([n])`
+*(or any other distribution). The actual increment added to the
+*counter is an unspecified implementation detail . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li delta: The amount of advancement . \n
+
+*@par Outputs:
+*y:A Returns the created operation . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow RngSkip operator.
+*/
+
+REG_OP(RngSkip)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(delta, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(RngSkip)
+
+/**
+*@brief Outputs random integers from a uniform distribution.
+The generated values are uniform integers in the range `[minval, maxval)`.
+The lower bound `minval` is included in the range, while the upper bound
+`maxval` is excluded.
+The random integers are slightly biased unless `maxval - minval` is an exact
+power of two.  The bias is small for values of `maxval - minval` significantly
+smaller than the range of the output (either `2^32` or `2^64`) . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor.
+*@li minval: Minimum value (inclusive, scalar).
+*@li maxval: Maximum value (exclusive, scalar) . \n
+
+*@par Outputs:
+*y:A Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulRandomBinomial operator.
+*/
+
+REG_OP(StatefulRandomBinomial)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32}))
+    .INPUT(counts, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(probs, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(dtype, Type)
+    .OP_END_FACTORY_REG(StatefulRandomBinomial)
+
+/**
+*@brief Outputs random values from a normal distribution.
+*The generated values will have mean 0 and standard deviation 1 . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor . \n
+
+*@par Outputs:
+*y:A Returns A tensor of the specified shape filled with random normal values . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulStandardNormalV2 operator.
+*/
+
+REG_OP(StatefulStandardNormalV2)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(StatefulStandardNormalV2)
+
+/**
+*@brief Outputs random values from a truncated normal distribution.
+*The generated values follow a normal distribution with mean 0 and standard
+*deviation 1, except that values whose magnitude is more than 2 standard
+*deviations from the mean are dropped and re-picked . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor . \n
+
+*@par Outputs:
+*y:A Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulTruncatedNormal operator.
+*/
+
+REG_OP(StatefulTruncatedNormal)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(StatefulTruncatedNormal)
+
+/**
+*@brief Outputs random values from a uniform distribution.
+The generated values follow a uniform distribution in the range `[0, 1)`. The
+lower bound 0 is included in the range, while the upper bound 1 is excluded.
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor . \n
+
+*@par Outputs:
+*y:A Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulUniform operator.
+*/
+
+REG_OP(StatefulUniform)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(StatefulUniform)
+
+/**
+*@brief Outputs random integers from a uniform distribution.
+The generated values are uniform integers covering the whole range of `dtype` . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor . \n
+
+*@par Outputs:
+*y:A  Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulUniformFullInt operator.
+*/
+
+REG_OP(StatefulUniformFullInt)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(y, TensorType({DT_UINT64}))
+    .OP_END_FACTORY_REG(StatefulUniformFullInt)
+
+/**
+*@brief Outputs random integers from a uniform distribution.
+The generated values are uniform integers in the range `[minval, maxval)`.
+The lower bound `minval` is included in the range, while the upper bound
+`maxval` is excluded.
+The random integers are slightly biased unless `maxval - minval` is an exact
+power of two.  The bias is small for values of `maxval - minval` significantly
+smaller than the range of the output (either `2^32` or `2^64`) . \n
+
+*@par Inputs:
+*@li resource: The handle of the resource variable that stores the state of the RNG.
+*@li algorithm: The RNG algorithm.
+*@li shape: The shape of the output tensor.
+*@li minval: Minimum value (inclusive, scalar).
+*@li maxval: Maximum value (exclusive, scalar) . \n
+
+*@par Outputs:
+*y:A Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with tensorflow StatefulUniformInt operator.
+*/
+
+REG_OP(StatefulUniformInt)
+    .INPUT(x, TensorType({DT_RESOURCE}))
+    .INPUT(algorithm, TensorType({DT_INT64}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(minval, TensorType({DT_INT64}))
+    .INPUT(maxval, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .OP_END_FACTORY_REG(StatefulUniformInt)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/inc/ops/stateless_random_ops.h
new file mode 100644
index 00000000..ff9daaa3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/stateless_random_ops.h
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file stateless_random_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
+
+#include "graph/operator.h"
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Draws samples from a multinomial distribution . \n
+
+*@par Inputs:
+include:
+*@li logits:2-D Tensor with shape [batch_size, num_classes]. Each slice [i, :]
+*represents the unnormalized log probabilities for all classes.
+*@li num_samples:0-D. Number of independent samples to draw for each row slice.
+*@li seed:The seed to generate random . \n
+
+*@par Attributes:
+*output_dtype:Output data type . \n
+
+*@par Outputs:
+*y:Output random number . \n
+
+*@see StatelessMultinomial()
+
+*@par Third-party framework compatibility
+*compatible with StatelessMultinomial op of tensorflow
+*/
+REG_OP(StatelessMultinomial)
+    .INPUT(logits, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE}))
+    .INPUT(num_samples, TensorType({DT_INT32}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(output_dtype, Type, DT_INT64)
+    .OP_END_FACTORY_REG(StatelessMultinomial)
+
+/**
+*@brief Outputs deterministic pseudorandom random integers from a uniform distribution . \n
+
+*@par Inputs:
+*@li shape: The shape of the output tensor.
+*@li seed: 2 seeds (shape [2]).
+*@li minval: Minimum value (inclusive, scalar).
+*@li maxval: Maximum value (exclusive, scalar) . \n
+
+*@par Outputs:
+*y: Returns Random values with specified shape . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow StatelessRandomUniformInt operator.
+*/
+
+REG_OP(StatelessRandomUniformInt)
+    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(seed, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(minval, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(maxval, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(StatelessRandomUniformInt)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/ops/string_ops.h b/third_party/fwkacllib/inc/inc/ops/string_ops.h
new file mode 100644
index 00000000..29aec302
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/string_ops.h
@@ -0,0 +1,562 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file string_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_
+
+#include <sstream>
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Split elements of input based on delimiter into a SparseTensor . \n
+
+*@par Inputs:
+include:
+*@li input:1-D. Strings to split.
+*@li delimiter:0-D. Delimiter characters (bytes), or empty string . \n
+
+*@par Attributes:
+* skip_empty:A bool. If True, skip the empty strings from the result . \n
+
+*@par Outputs:
+*@li indices:A dense matrix of int64 representing the indices of the sparse tensor.
+*@li values:A vector of strings corresponding to the splited values.
+*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor,
+*where the first value is N and the second value is the maximum number of tokens
+*in a single input entry . \n
+
+*@see StringSplit()
+
+*@par Third-party framework compatibility
+*compatible with StringSplit op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringSplit)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(delimiter, TensorType({DT_STRING}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_STRING}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .ATTR(skip_empty, Bool, true)
+    .OP_END_FACTORY_REG(StringSplit)
+
+/**
+*@brief Split elements of source based on sep into a SparseTensor . \n
+
+*@par Inputs:
+include:
+*@li input:1-D. Strings to split.
+*@li sep:0-D string Tensor, the delimiter character . \n
+
+*@par Attributes:
+* maxsplit:An int. If maxsplit > 0, limit of the split of the result . \n
+
+*@par Outputs:
+*@li indices:A dense matrix of int64 representing the indices of the sparse tensor.
+*@li values:A vector of strings corresponding to the splited values.
+*@li shape:A length-2 vector of int64 representing the shape of the sparse tensor,
+*where the first value is N and the second value is the maximum number of tokens
+*in a single input entry . \n
+
+*@see StringSplitV2()
+
+*@par Third-party framework compatibility
+*compatible with StringSplitV2 op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringSplitV2)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(sep, TensorType({DT_STRING}))
+    .OUTPUT(indices, TensorType({DT_INT64}))
+    .OUTPUT(values, TensorType({DT_STRING}))
+    .OUTPUT(shape, TensorType({DT_INT64}))
+    .ATTR(maxsplit, Int, -1)
+    .OP_END_FACTORY_REG(StringSplitV2)
+
+/**
+*@brief Determine the script codes of a given tensor of Unicode integer code points . \n
+
+*@par Inputs:
+include:
+*x:A Tensor of int32 Unicode code points . \n
+
+*@par Outputs:
+*y:A Tensor of int32 script codes corresponding to each input code point . \n
+
+*@attention Constraints:
+*This operation converts Unicode code points to script codes corresponding to
+*each code point. Script codes correspond to International Components for
+*Unicode (ICU) UScriptCode values.
+*See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+*Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints.
+*Output shape will match input shape . \n
+
+*@see UnicodeScript()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeScript op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeScript)
+    .INPUT(x, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(UnicodeScript)
+
+/**
+*@brief Return substrings from Tensor of strings . \n
+
+*@par Inputs:
+include:
+*@li input:Tensor of strings.
+*@li pos:Scalar defining the position of first character in each substring.
+*@li len:Scalar defining the number of characters to include in each substring . \n
+
+*@par Outputs:
+*output:Tensor of substrings . \n
+
+*@attention Constraints:
+*The hash function is deterministic on the content of the string within
+*the process and will never change. However, it is not suitable for
+*cryptography. This function may be used when CPU time is scarce and
+*inputs are trusted or unimportant. There is a risk of adversaries
+*constructing inputs that all hash to the same bucket.
+*To prevent this problem, use a strong hash function with
+*tf.string_to_hash_bucket_strong . \n
+
+*@see Substr()
+
+*@par Third-party framework compatibility
+*compatible with Substr op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Substr)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(pos, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(len, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(Substr)
+
+/**
+*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n
+
+*@par Inputs:
+include:
+*string_tensor:The strings to assign a hash bucket . \n
+
+*@par Outputs:
+*y:A Tensor of the same shape as the input x . \n
+
+*@attention Constraints:
+*The hash function is deterministic on the content of the string within
+*the process and will never change. However, it is not suitable for cryptography.
+*This function may be used when CPU time is scarce and inputs are trusted or
+*unimportant. There is a risk of adversaries constructing inputs that all hash
+*to the same bucket. To prevent this problem, use a strong hash function with
+*tf.string_to_hash_bucket_strong . \n
+
+*@see StringToHashBucketFast()
+
+*@par Third-party framework compatibility
+*compatible with StringToHashBucketFast op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringToHashBucketFast)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .ATTR(num_buckets, Int, 1)
+    .OP_END_FACTORY_REG(StringToHashBucketFast)
+
+/**
+*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n
+
+*@par Inputs:
+include:
+*x:The strings to assign a hash bucket . \n
+
+*@par Attributes:
+*num_buckets:The number of buckets . \n
+
+*@par Outputs:
+*y:A Tensor of the same shape as the input x . \n
+
+*@attention Constraints:
+*@li A strong hash is important when inputs may be malicious, e.g. URLs with
+*additional components. Adversaries could try to make their inputs hash to
+*the same bucket for a denial-of-service attack or to skew the results.
+*A strong hash can be used to make it difficult to find inputs with a skewed
+* hash value distribution over buckets. This requires that the hash function\
+*is seeded by a high-entropy (random) "key" unknown to the adversary.
+*@li The additional robustness comes at a cost of roughly 4x higher
+*compute time than tf.string_to_hash_bucket_fast . \n
+
+*@see StringToHashBucketStrong()
+
+*@par Third-party framework compatibility
+*compatible with StringToHashBucketStrong op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringToHashBucketStrong)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .ATTR(num_buckets, Int, 1)
+    .REQUIRED_ATTR(key, ListInt)
+    .OP_END_FACTORY_REG(StringToHashBucketStrong)
+
+/**
+*@brief Converts each string in the input Tensor to its hash mod by a number of buckets . \n
+
+*@par Inputs:
+include:
+*string_tensor:The strings to assign a hash bucket . \n
+
+*@par Attributes:
+*num_buckets:The number of buckets . \n
+
+*@par Outputs:
+*y:A Tensor of the same shape as the input string_tensor . \n
+
+*@see StringToHashBucket()
+
+*@par Third-party framework compatibility
+*compatible with StringToHashBucket op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringToHashBucket)
+    .INPUT(string_tensor, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .ATTR(num_buckets, Int, 1)
+    .OP_END_FACTORY_REG(StringToHashBucket)
+
+/**
+*@brief Strip leading and trailing whitespaces from the Tensor . \n
+
+*@par Inputs:
+include:
+*x:A string Tensor of any shape . \n
+
+*@par Outputs:
+*y:A string Tensor of the same shape as the input . \n
+
+*@see StringStrip()
+
+*@par Third-party framework compatibility
+*compatible with StringStrip op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringStrip)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(StringStrip)
+
+/**
+*@brief Computes the length of each string given in the input tensor . \n
+
+*@par Inputs:
+include:
+*x:The string for which to compute the length . \n
+
+*@par Attributes:
+*unit:The unit that is counted to compute string length.
+*One of: "BYTE" (for the number of bytes in each string) or
+*"UTF8_CHAR" (for the number of UTF-8 encoded Unicode code points in each string).
+*Results are undefined if unit=UTF8_CHAR and the input strings do not contain
+*structurally valid UTF-8 . \n
+
+*@par Outputs:
+*y:Integer tensor that has the same shape as input.
+*The output contains the element-wise string lengths of input . \n
+
+*@see StringLength()
+
+*@par Third-party framework compatibility
+*compatible with StringLength op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringLength)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .ATTR(unit, String, "BYTE")
+    .OP_END_FACTORY_REG(StringLength)
+
+/**
+*@brief Joins the strings in the given list of string tensors into one tensor . \n
+
+*@par Inputs:
+*The input is a string tensor of any shape. The pattern is a scalar string tensor
+*which is applied to every element of the input tensor. The boolean values
+*(True or False) of the output tensor indicate if the input matches the regex
+*pattern provided. The pattern follows the re2 syntax
+*(https://github.com/google/re2/wiki/Syntax).:
+include:
+*x:A list of string tensors. The tensors must all have the same shape,
+*or be scalars. Scalars may be mixed in; these will be broadcast to the shape
+*of non-scalar inputs . It's a dynamic input. \n
+
+*@par Attributes:
+*@li N:The length of input x.
+*@li separator:string, an optional join separator . \n
+
+*@par Outputs:
+*y:The output tensor . \n
+
+*@see StringJoin()
+
+*@par Third-party framework compatibility
+*compatible with StringJoin op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringJoin)
+    .DYNAMIC_INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .REQUIRED_ATTR(N, Int)
+    .ATTR(separator, String, "")
+    .OP_END_FACTORY_REG(StringJoin)
+
+/**
+*@brief Formats a string template using a list of tensors . \n
+
+*@par Inputs:
+*The input is a string tensor of any shape. The pattern is a scalar string tensor
+*which is applied to every element of the input tensor.
+*The boolean values (True or False) of the output tensor indicate if the input
+*matches the regex pattern provided. The pattern follows the re2 syntax
+*(https://github.com/google/re2/wiki/Syntax).:
+include:
+*x:The tensors to format into the placeholder string . It's a dynamic input. \n
+
+*@par Attributes:
+*@li template:A string, the template to format tensor summaries into.
+*@li placeholder:A string, at each placeholder in the template a subsequent tensor summary will be inserted.
+*@li summarize:When formatting the tensor summaries print the first and last summarize entries of each tensor dimension . \n
+
+*@par Outputs:
+*y:The resulting string scalar . \n
+
+*@see StringFormat()
+
+*@par Third-party framework compatibility
+* compatible with StringFormat op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringFormat)
+    .DYNAMIC_INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
+        DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_STRING, DT_FLOAT16, \
+        DT_FLOAT, DT_DOUBLE, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .ATTR(template, String, "%s")
+    .ATTR(placeholder, String, "%s")
+    .ATTR(summarize, Int, 3)
+    .OP_END_FACTORY_REG(StringFormat)
+
+/**
+*@brief Check if the input matches the regex pattern . \n
+
+*@par Inputs:
+*The input is a string tensor of any shape. The pattern is a scalar string tensor
+*which is applied to every element of the input tensor. The boolean values
+*(True or False) of the output tensor indicate if the input matches the regex
+*pattern provided. The pattern follows the re2 syntax
+*(https://github.com/google/re2/wiki/Syntax).:
+include:
+*@li x:A string tensor of the text to be processed.
+*@li pattern:A scalar string tensor containing the regular expression to match the input . \n
+
+*@par Outputs:
+*y:A bool tensor with the same shape as input . \n
+
+*@see RegexFullMatch()
+
+*@par Third-party framework compatibility
+*compatible with RegexFullMatch op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(RegexFullMatch)
+    .INPUT(x, TensorType({DT_STRING}))
+    .INPUT(pattern, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(RegexFullMatch)
+
+/**
+*@brief Replaces matches of the pattern regular expression in input with the
+*replacement string provided in rewrite . \n
+
+*@par Inputs:
+*It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax).:
+include:
+*@li x:The text to be processed.
+*@li pattern:The regular expression to be matched in the input strings.
+*@li rewrite:The rewrite string to be substituted for the pattern expression
+*where it is matched in the input strings . \n
+
+*@par Attributes:
+*replace_global:If True, the replacement is global
+*(that is, all matches of the pattern regular expression in each input string
+*are rewritten), otherwise the rewrite substitution is only made for the first
+* pattern match . \n
+
+*@par Outputs:
+*y:The text after applying pattern match and rewrite substitution . \n
+
+*@see RegexReplace()
+
+*@par Third-party framework compatibility
+*compatible with RegexReplace op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(RegexReplace)
+    .INPUT(x, TensorType({DT_STRING}))
+    .INPUT(pattern, TensorType({DT_STRING}))
+    .INPUT(rewrite, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .ATTR(replace_global, Bool, true)
+    .OP_END_FACTORY_REG(RegexReplace)
+
+/**
+*@brief Converts each entry in the given tensor to strings . \n
+
+*@par Inputs:
+*Supports many numeric types and boolean.:
+include:
+*x:A tensor can be trans to string . \n
+
+*@par Attributes:
+*@li precision:The post-decimal precision to use for floating point numbers.
+*Only used if precision > -1.
+*@li scientific:Use scientific notation for floating point numbers.
+*@li shortest:Use shortest representation (either scientific or standard)
+*for floating point numbers..
+*@li width:Pad pre-decimal numbers to this width. Applies to both floating
+*point and integer numbers. Only used if width > -1.
+*@li fill:The value to pad if width > -1. If empty, pads with spaces.
+*Another typical value is '0'. String cannot be longer than 1 character . \n
+
+*@par Outputs:
+*y:The output tensor . \n
+
+*@see AsString()
+
+*@par Third-party framework compatibility
+*compatible with AsString op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AsString)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
+        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .ATTR(precision, Int, -1)
+    .ATTR(scientific, Bool, false)
+    .ATTR(shortest, Bool, false)
+    .ATTR(width, Int, -1)
+    .ATTR(fill, String, "")
+    .OP_END_FACTORY_REG(AsString)
+
+/**
+*@brief Encode strings into web-safe base64 format . \n
+
+*@par Inputs:
+*Input may or may not have padding at the end. See EncodeBase64 for padding.
+*Web-safe means that input must use - and _ instead of + and /.:
+include:
+*x:Strings to be encoded . \n
+
+*@par Attributes:
+*pad:Bool whether padding is applied at the ends . \n
+
+*@par Outputs:
+*y:Input strings encoded in base64 . \n
+
+*@attention Constraints:
+*Refer to the following article for more information on base64 format:
+*en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '='
+*at the end so that the encoded has length multiple of 4.
+*See Padding section of the link above. Web-safe means that the encoder
+*uses - and _ instead of + and / . \n
+
+*@see EncodeBase64()
+
+*@par Third-party framework compatibility
+*compatible with EncodeBase64 op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(EncodeBase64)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .ATTR(pad, Bool, false)
+    .OP_END_FACTORY_REG(EncodeBase64)
+
+/**
+*@brief Decode web-safe base64-encoded strings . \n
+
+*@par Inputs:
+*Input may or may not have padding at the end. See EncodeBase64 for padding.
+*Web-safe means that input must use - and _ instead of + and /.:
+include:
+*x:Base64 strings to decode . \n
+
+*@par Outputs:
+*y:Decoded strings . \n
+
+*@see DecodeBase64()
+
+*@par Third-party framework compatibility
+*compatible with DecodeBase64 op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DecodeBase64)
+    .INPUT(x, TensorType({DT_STRING}))
+    .OUTPUT(y, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(DecodeBase64)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/inc/ops/swap_co_ops.h
new file mode 100644
index 00000000..6e8eaac3
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/swap_co_ops.h
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file swap_co_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Folds the convolution input weight constant of the preceding layer
+* of PSROIPooling to convert the N dimension of the weight from
+* (output_dim, group_size*group_size) to
+* (group_size*group_size, int((output_dim+15)/C0)*C0).
+*@see PSROIPooling
+
+*@par Inputs:
+* One input:
+*x: An NCHW tensor of type float16 or float32, describing the weight of
+* convolution. Dim N must equal output_dim*group_size*group_size . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels.
+* Must be greater than "0".
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps. Must be within the range (0, 128) . \n
+
+*@par Outputs:
+*y: An NCHW tensor of type float16 or float32, describing the result weight
+* of convolution.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(SwapCo)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(output_dim, Int, 0)
+    .ATTR(group_size, Int, 0)
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(SwapCo)
+
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/inc/ops/target_crop_and_resize.h
new file mode 100644
index 00000000..9bef1d7b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/target_crop_and_resize.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file target_crop_and_resize.h
+ * \brief
+ */
+#ifndef GE_OP_TARGET_CROP_AND_RESIZE_H
+#define GE_OP_TARGET_CROP_AND_RESIZE_H
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+
+/**
+*@brief Performs crop and resize on images.
+
+*@par Inputs:
+*@li x: An NCHW tensor of type uint8, specifying the input to the data layer.
+*@li boxes: Crop parameters of type int32. \n
+*@li box_index: Batch index parameters of type int32. The batch of the input x to be cropped and resize. \n
+
+*@par Attributes:
+*output_h: A required int, specifying the height of output. \n
+*output_w: A required int, specifying the width of output. \n
+*input_format: A required string, specifying the input format. \n
+
+*@par Outputs:
+*y: The output tensor of type uint8, format only support NC1HWC0_C04.
+*@par Third-party framework compatibility
+* It is a custom operator. It has no corresponding operator in Caffe.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(TargetCropAndResize)
+    .INPUT(x, TensorType({DT_UINT8}))
+    .INPUT(boxes, TensorType({DT_INT32}))
+    .INPUT(box_index, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_UINT8}))
+    .ATTR(output_h, Int, 224)
+    .ATTR(output_w, Int, 224)
+    .ATTR(input_format, String, "YUV420SP_U8")
+    .OP_END_FACTORY_REG(TargetCropAndResize)
+}
+#endif //GE_OP_TARGET_CROP_AND_RESIZE_H
diff --git a/third_party/fwkacllib/inc/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/inc/ops/transformation_ops.h
new file mode 100644
index 00000000..1b30c2e1
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/transformation_ops.h
@@ -0,0 +1,721 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file transformation_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief This operation convert output dataType and shape
+
+*@par Inputs:
+*The input handle must have the resource type. Inputs include:
+*@li x:A list of Tensor objects. One or more tensors from which
+the enqueued tensors should be taken . \n
+
+*@par Outputs:
+*@li y:A list of Tensor objects. One or more tensors from which
+the enqueued tensors should be taken . \n
+
+*@par Attributes:
+*@li type: An optional ge::DataType. It refers to the target data type of outputs . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow QueueIsClosed operator.
+*/
+
+REG_OP(Bitcast)
+    .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                          DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
+                          DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8,
+                           DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64,
+                           DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .REQUIRED_ATTR(type, Type)
+    .OP_END_FACTORY_REG(Bitcast)
+
+/**
+*@brief Convert tensor format from HWCN to C1HWNCoC0 . \n
+
+*@par Inputs:
+*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN . \n
+
+*@par Outputs:
+*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0.
+*/
+REG_OP(DepthwiseWeight4DTo6D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
+    .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D)
+
+/**
+*@brief Convert tensor format from C1HWNCoC0 to HWCN . \n
+
+*@par Inputs:
+*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0 . \n
+
+*@par Attributes:
+*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN . \n
+
+*@par Outputs:
+*y: A 4D Tensor. Has the same type as "x", with format HWCN.
+*/
+REG_OP(DepthwiseWeight6DTo4D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
+    .ATTR(channel_size, Int, 16)
+    .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D)
+
+/**
+*@brief Permutes the dimensions according to perm.
+        The returned tensor's dimension i will correspond to the input dimension perm[i] . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+
+*@par Attributes:
+*perm: A permutation of the dimensions of "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Transpose instead.
+*/
+REG_OP(TransposeD)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                        DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                         DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(perm, ListInt)
+    .OP_END_FACTORY_REG(TransposeD)
+
+/**
+*@brief Permutes the dimensions according to perm.
+        The returned tensor's dimension i will correspond to the input dimension perm[i] . \n
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li perm: A Tensor of type int32 or int64. A permutation of the dimensions of "x" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator Transpose.
+*/
+REG_OP(Transpose)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(perm, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Transpose)
+
+/**
+*@brief Doing format_transfer for various data format only
+support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
+"NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW".
+"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n
+
+*@par Inputs:
+*src: A Tensor dtype of all types . \n
+
+*@par Attributes:
+*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
+*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
+*@li group: A optional int32, default value is 1. \n
+
+*@par Outputs:
+*dst: A Tensor dtype of all types.
+*/
+REG_OP(TransData)
+    .INPUT(src, TensorType::BasicType())
+    .OUTPUT(dst, TensorType::BasicType())
+    .REQUIRED_ATTR(src_format, String)
+    .REQUIRED_ATTR(dst_format, String)
+    .ATTR(groups, Int, 1)
+    .OP_END_FACTORY_REG(TransData)
+
+/**
+*@brief Permutes the dimensions according to order.
+        The returned tensor's dimension i will correspond to the input dimension order[i] . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*order: A permutation of the dimensions of "x".Type is int32.support any axis transformation.Defaults to "{0}"
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*/
+REG_OP(Permute)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(order, ListInt, {0})
+    .OP_END_FACTORY_REG(Permute)
+
+/**
+*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors
+* along axis 1 . \n
+
+*@par Inputs:
+*One input:
+*x: A multi-dimensional Tensor. Must be one of the following types:
+* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n
+
+*@par Outputs:
+*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors
+* along axis 1). Must be one of the following data types: int8, uint8, int16,
+* uint16, int32, uint32, int64,uint64, float16, float32 . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow operator Flatten.
+*/
+REG_OP(Flatten)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
+                          DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64,
+                          DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
+                           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64,
+                           DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(Flatten)
+
+/**
+*@brief Permutes and crops the input tensor . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0.
+*@li block_shape: A 1D list or tuple of int32 or int64.
+*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to
+*crop from start and end dimensions after permutation . \n
+
+*@par Outputs:
+*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchToSpaceND.
+*/
+REG_OP(BatchToSpaceND)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(block_shape, TensorType::IndexNumberType())
+    .INPUT(crops, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(BatchToSpaceND)
+
+/**
+*@brief Permutes and crops the input tensor . \n
+
+*@par Inputs:
+* One input:
+*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0 . \n
+
+*@par Attributes:
+*@li block_shape: A required 1D list or tuple of int32 or int64.
+*@li crops: A required 2D list or tuple of int32 or int64. Specifies the amount to crop
+* from the start and end dimensions after permutation . \n
+
+*@par Outputs:
+*y: A Tensor with format NC1HWC0. Has the same type as input "x".
+
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchToSpaceND.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead.
+*/
+REG_OP(BatchToSpaceNDD)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(block_shape, ListInt)
+    .REQUIRED_ATTR(crops, ListInt)
+    .OP_END_FACTORY_REG(BatchToSpaceNDD)
+
+/**
+*@brief Pads and permutes the input tensor . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0.
+*@li block_shape: A 1D list or tuple of int32 or int64.
+*@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n
+
+*@par Outputs:
+*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SpaceToBatchND.
+*/
+REG_OP(SpaceToBatchND)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(block_shape, TensorType::IndexNumberType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(SpaceToBatchND)
+
+/**
+*@brief Pads and permutes the input tensor . \n
+
+*@par Inputs:
+* One input:
+*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n
+
+*@par Attributes:
+*@li block_shape: A required 1D list or tuple of int32 or int64.
+*@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n
+
+*@par Outputs:
+*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SpaceToBatchND.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead.
+*/
+REG_OP(SpaceToBatchNDD)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(block_shape, ListInt)
+    .REQUIRED_ATTR(paddings, ListInt)
+    .OP_END_FACTORY_REG(SpaceToBatchNDD)
+
+/**
+*@brief Outputs a copy of the input tensor where values from the "height" and
+* "width" dimensions are moved to the "depth" dimension . \n
+
+*@par Inputs:
+*x: An NHWC Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+
+
+*@par Attributes:
+*@li block_size: A required int, specifying the input block size.
+*@li data_format: An optional string, specifying the data format. Defaults to
+* "NHWC" . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as input "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SpaceToDepth.
+*/
+REG_OP(SpaceToDepth)
+  .INPUT(x, TensorType::BasicType())
+  .OUTPUT(y, TensorType::BasicType())
+  .REQUIRED_ATTR(block_size, Int)
+  .ATTR(data_format, String, "NHWC")
+  .OP_END_FACTORY_REG(SpaceToDepth)
+
+/**
+*@brief Rearranges data from depth into blocks of spatial data . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, double, int32, uint8,
+*     int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
+*     complex128, uint32, uint64
+
+*@par Attributes:
+*Two attributes, including:
+* @li block_size: An int >= 2, specifying the size of the spatial block.
+* @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n
+
+*@par Outputs:
+*y: A Tensor of the same type as "x" . \n
+
+*@par Third-party framework compatibility:
+* Compatible with TensorFlow operator DepthToSpace.
+*/
+REG_OP(DepthToSpace)
+  .INPUT(x, TensorType::BasicType())
+  .OUTPUT(y, TensorType::BasicType())
+  .REQUIRED_ATTR(block_size, Int)
+  .ATTR(data_format, String, "NHWC")
+  .OP_END_FACTORY_REG(DepthToSpace)
+
+/**
+*@brief Permutes data into spatial data blocks and then prunes them . \n
+
+*@par Inputs:
+*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"]
+*@li crops: A 1D list or tuple of int32 or int64 . \n
+
+*Must be one of the following types: float16, float32
+
+*@par Attributes:
+*block_size: A required int8, int16, int32, or int64. No default value . \n
+
+*@par Outputs:
+*y: A 4D Tensor with format NHWC,
+
+* of type float16 or float32 . \n
+
+*@attention Constraints:
+*@li The size of the first dimension of input "x" must be divisible by (block_size * block_size).
+*@li "crops" is a 4Dshape [batch, height, width, depth], height = height_pad - crop_top - crop_bottom,
+*width = width_pad - crop_left - crop_right.
+*@li block_size > 2
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchToSpace.
+*/
+REG_OP(BatchToSpace)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(crops, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(block_size, Int)
+    .OP_END_FACTORY_REG(BatchToSpace)
+
+/**
+*@brief Rearrange the batch (permutes) data into spatial data blocks, and then crop them . \n
+
+*@par Inputs:
+* One input:
+*x: An Tensor of shape [batch*block_size*block_size, height_pad/block_size, width_pad/block_size, depth].
+*The batch size of the input tensor must be divisible by (block size * block size).
+*Must be one of the following types:  float16, float32, double, int64, int32, uint8, uint16, uint32, uint64,
+*int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
+
+*@par Attributes:
+*@li block_size: Must be one of the following types: `int32`, `int64`.
+*@li crops: An Tensor. Must be one of the following types: int32, Int64.
+*2D tensor with non negative integer of shape [2, 2]. It specifies how many
+*elements are clipped from the intermediate result of spatial dimension . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x" . \n
+
+*@attention Constraints:
+*@li The size of the first dimension of input "x" must be divisible by (block_size * block_size).
+*@li "crops" is a 2D tensor of non-negative integers with shape (2, 2).
+*@li block_size > 2
+
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchToSpace.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
+*/
+REG_OP(BatchToSpaceD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
+                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
+                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
+                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
+                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .REQUIRED_ATTR(block_size, Int)
+    .REQUIRED_ATTR(crops, ListInt)
+    .OP_END_FACTORY_REG(BatchToSpaceD)
+
+/**
+*@brief Outputs a copy of the input tensor where values from the "height" and
+* "width" dimensions are padded and rearranged to the "batch" dimension . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An 4D Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+* Must set the format, supported format list ["NCHW, NHWC"]
+*@li paddings: A 2D tensor of type int, specifying the input . \n
+
+*@par Attributes:
+*block_size: A required int, specifying the input block size . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as input "x".
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator SpaceToBatch.
+*/
+REG_OP(SpaceToBatch)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(paddings, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(block_size, Int)
+    .OP_END_FACTORY_REG(SpaceToBatch)
+
+/**
+*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are padded and rearranged to the "batch" dimension . \n
+
+*@par Inputs:
+*x: An NHWC Tensor. Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+
+
+*@par Attributes:
+*@li block_size: A required int, specifying the input block size.
+*@li paddings: A 2D tensor. All data types are supported . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as input "x".
+*@par Third-party framework compatibility
+*@ Compatible with the TensorFlow operator SpaceToBatch.
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead.
+*/
+REG_OP(SpaceToBatchD)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(block_size, Int)
+    .REQUIRED_ATTR(paddings, ListInt)
+    .OP_END_FACTORY_REG(SpaceToBatchD)
+
+/**
+* @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1)
+* tensors . \n
+
+* @par Inputs:
+* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0 . \n
+
+* @par Attributes:
+* @li num: A required int, specifying the number of tensors to be unpacked to.
+* Defaults to "None".
+* @li axis: An optional int, specifying the axis to unpack along. The value range
+* is [-R, R) . \n
+
+* @par Outputs:
+* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType . \n
+
+* @attention Constraints:
+* @li If "num" is not specified, it is inferred from the shape of "x".
+* @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format,
+* "axis" must not be 2, 3, -2, or -3 . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator Unpack.
+*/
+REG_OP(Unpack)
+    .INPUT(x, TensorType::BasicType())
+    .DYNAMIC_OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(num, Int)
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(Unpack)
+
+/**
+* @brief Extract "patches" from "images" and stacks them in the "depth"
+* dimension of the output . \n
+
+* @par Inputs:
+* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
+*    following types:float32, double, int32, uint8, int16, int8, int64, uint16,
+*    float16, uint32, uint64. The inputs must have data_format with one of follows:
+*    NHWC, NCHW.
+
+* @par Attributes:
+* @li ksizes: A required list or tuple. The size of the sliding window for each
+* dimension of images.
+* @li strides: A required list or tuple. How far the centers of two consecutive
+* patches are in the images. Must be: [1, stride_rows, stride_cols, 1].
+* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].
+* This is the input stride, specifying how far two consecutive patch
+* samples are in the input. Equivalent to extracting patches
+* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
+* (rates - 1), followed by subsampling them spatially by a factor of rates.
+* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
+* @li padding: A required string. The type of padding algorithm to use,
+  support "SAME" or "VALID". \n
+
+* @par Outputs:
+* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
+* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
+* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
+* are the dimensions of the output patches . \n
+
+* @attention Constraints:
+* "ksizes", "strides" and "rates" are lists of integers . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ExtractImagePatches.
+*/
+REG_OP(ExtractImagePatches)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksizes, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .OP_END_FACTORY_REG(ExtractImagePatches)
+
+/**
+* @brief Extract "patches" from "input" and put them in the "depth"
+* dimension of the output . \n
+
+* @par Inputs:
+* x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
+*    The inputs must have data_format with one of follows: NDHWC, NCDHW. \n
+
+* @par Attributes:
+* @li ksizes: A required list or tuple. The size of the sliding window for each
+* dimension of "x".
+* @li strides: A required list or tuple. How far the centers of two consecutive
+* patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
+* @li padding: A required string. The type of padding algorithm to use ,
+* support "SAME" or "VALID" . \n
+
+* @par Outputs:
+* Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
+* ksize_rows * ksize_cols * depth] containing patches with size (ksize_rows * ksize_cols
+* * depth) vectorized in the "depth" dimension. Note "out_planes", "out_rows" and "out_cols"
+* are the dimensions of the output patches . \n
+
+* @attention Constraints:
+* "ksizes" and "strides" are lists of integers.
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator ExtractVolumePatches.
+*/
+REG_OP(ExtractVolumePatches)
+    .INPUT(x, TensorType::REALNUMBERTYPE())
+    .OUTPUT(y, TensorType::REALNUMBERTYPE())
+    .REQUIRED_ATTR(ksizes, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(padding, String)
+    .OP_END_FACTORY_REG(ExtractVolumePatches)
+
+/**
+*@brief Confuse reshape and transpose . \n
+
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+
+*@par Attributes:
+*@li perm: A permutation of the dimensions of "x".
+*@li shape: The shape of the input.
+*@li transpose_first: If True, the transpose is first, otherwise the reshape is first . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead.
+*/
+REG_OP(ConfusionTransposeD)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                        DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8,
+                         DT_UINT16, DT_UINT32, DT_UINT64, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(perm, ListInt)
+    .REQUIRED_ATTR(shape, ListInt)
+    .REQUIRED_ATTR(transpose_first, Bool)
+    .OP_END_FACTORY_REG(ConfusionTransposeD)
+
+/**
+*@brief Confuse reshape and transpose . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64.
+*@li shape: The shape of the input . \n
+
+*@par Attributes:
+*@li perm: A permutation of the dimensions of "x".
+*@li transpose_first: If True, the transpose is first, otherwise the reshape is first . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(ConfusionTranspose)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(shape, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(perm, ListInt)
+    .REQUIRED_ATTR(transpose_first, Bool)
+    .OP_END_FACTORY_REG(ConfusionTranspose)
+
+/**
+*@brief Flattens the input tensor to one-dimensional . \n
+
+*@par Inputs:
+*x: An ND tensor. All data types are supported . \n
+
+*@par Attributes:
+*@li axis: An optional int32, specifying the first axis to flatten. All preceding axes are retained in the output. Defaults to "1".
+*@li end_axis: An optional int32, specifying the last axis to flatten. All following axes are retained in the output. Defaults to "-1" . \n
+
+*@par Outputs:
+*y: The flattened ND tensor. All data types are supported . \n
+
+*@attention Constraints:
+* "axis" and "end_axis" must be within the dimension range of the input. This operator cannot be directly called by the acllopExecute API.
+*@par Third-party framework compatibility
+* Compatible with the Caffe operator Flatten.
+*/
+REG_OP(FlattenV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                          DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
+                           DT_INT32, DT_UINT32, DT_INT64, DT_UINT64}))
+    .ATTR(axis, Int, 1)
+    .ATTR(end_axis, Int, -1)
+    .OP_END_FACTORY_REG(FlattenV2)
+
+/**
+*@brief Compress large weight to small one. Usually inserted before Conv2d.
+*
+*@par Inputs:
+*weight: A tensor before compress. Must be one of the following types: DT_INT8, DT_FLOAT16
+*
+*@par Outputs:
+*@li weight_compress: A tensor after compress. Must be one of the following types: DT_INT8, DT_FLOAT16
+*@li compress_index: A tensor. Must be one of the following types: DT_INT8
+*
+*@par Attributes:
+*compress_parameters: A required int8, specifying the compressing block.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(Compress)
+.INPUT(weight, TensorType({DT_INT8, DT_FLOAT16}))
+.OUTPUT(weight_compress, TensorType({DT_INT8, DT_FLOAT16}))
+.OUTPUT(compress_index, TensorType({DT_INT8}))
+.REQUIRED_ATTR(compress_parameters, ListInt)
+.OP_END_FACTORY_REG(Compress)
+
+/**
+*@brief Compress large weight to small one. Usually inserted before FullyConnection.
+*
+*@par Inputs:
+*weight: A tensor before compress. Must be one of the following types: DT_INT8, DT_FLOAT16
+*
+*@par Outputs:
+*@li weight_compress: A tensor after compress. Must be one of the following types: DT_INT8, DT_FLOAT16
+*@li compress_index: A tensor. Must be one of the following types: DT_INT8
+*
+*@par Attributes:
+*compress_parameters: A required int8, specifying the compressing block.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CompressFcOp)
+.INPUT(weight, TensorType({DT_INT8}))
+.OUTPUT(weight_compress, TensorType({DT_INT8}))
+.OUTPUT(compress_index, TensorType({DT_INT8}))
+.REQUIRED_ATTR(compress_parameters, ListInt)
+.OP_END_FACTORY_REG(CompressFcOp)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/inc/ops/warp_perspective_ops.h
new file mode 100644
index 00000000..8ef69d8b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/ops/warp_perspective_ops.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file warp_perspective_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_
+
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+/**
+*@brief Applies a perspective transformation to an image . \n
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n
+
+*@par Inputs:
+*@li x: input tensor, format NCHW, type must be float.
+*@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n
+
+*@par Attributes:
+*@li out_height:output height.
+*@li out_width:output width.
+*@li borderType:border processing way, only support BORDER_CONSTANT and BORDER_REPLICATE, default BORDER_CONSTANT.
+*@li constant: border processed value when borderType is BORDER_CONSTANT . \n
+
+*@par Outputs:
+*@li y: output tensor, format NCHW, type must be float.
+*/
+
+REG_OP(WarpPerspective)
+    .INPUT(x, TensorType({DT_FLOAT}))
+    .INPUT(matrix, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(out_height, Int)
+    .REQUIRED_ATTR(out_width, Int)
+    .ATTR(border_type, String, "BORDER_CONSTANT")
+    .ATTR(constant, Float, 0)
+    .OP_END_FACTORY_REG(WarpPerspective)
+}  // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_
diff --git a/third_party/fwkacllib/inc/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/inc/register/op_kernel_registry.h
new file mode 100644
index 00000000..5fed8960
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/register/op_kernel_registry.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_
+#define INC_REGISTER_OP_KERNEL_REGISTRY_H_
+#include <memory>
+#include <string>
+#include "register/register_types.h"
+#include "register.h"
+
+namespace ge {
+class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry {
+ public:
+  using CreateFn = HostCpuOp* (*)();
+  ~OpKernelRegistry();
+
+  static OpKernelRegistry& GetInstance() {
+    static OpKernelRegistry instance;
+    return instance;
+  }
+
+  bool IsRegistered(const std::string &op_type);
+
+  void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn);
+
+  std::unique_ptr<HostCpuOp> CreateHostCpuOp(const std::string &op_type);
+
+ private:
+  OpKernelRegistry();
+  class OpKernelRegistryImpl;
+  /*lint -e148*/
+  std::unique_ptr<OpKernelRegistryImpl> impl_;
+};
+} // namespace ge
+
+#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/inc/register/op_registry.h b/third_party/fwkacllib/inc/inc/register/op_registry.h
new file mode 100644
index 00000000..318eb3ba
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/register/op_registry.h
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_REGISTER_OP_REGISTRY_H_
+#define INC_REGISTER_OP_REGISTRY_H_
+
+#include <limits.h>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "register/register.h"
+
+namespace domi {
+enum RemoveInputType {
+  OMG_MOVE_TYPE_DTYPE = 0,
+  OMG_MOVE_TYPE_VALUE,
+  OMG_MOVE_TYPE_SHAPE,
+  OMG_MOVE_TYPE_FORMAT,
+  OMG_MOVE_TYPE_AXIS,
+  OMG_MOVE_TYPE_SCALAR_VALUE,
+  OMG_REMOVE_TYPE_WITH_COND = 1000,
+  OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE,
+  OMG_INPUT_REORDER,
+};
+
+struct RemoveInputConfigure {
+  int inputIdx = INT_MAX;
+  std::string attrName;
+  RemoveInputType moveType;
+  bool attrValue = false;
+  std::string originalType;
+  std::vector<int> input_order;
+};
+
+class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
+ public:
+  static OpRegistry *Instance();
+
+  std::vector<OpRegistrationData> registrationDatas;
+
+  bool Register(const OpRegistrationData &reg_data);
+
+  domi::ImplyType GetImplyType(const std::string &op_type);
+
+  void GetOpTypeByImplyType(std::vector<std::string> &vec_op_type, const domi::ImplyType &imply_type);
+
+  domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type);
+
+  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type);
+
+  domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type);
+
+  domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type,
+                                                             const std::string &ori_type);
+
+  domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
+
+  Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func);
+
+  domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype);
+
+  const std::vector<RemoveInputConfigure> &GetRemoveInputConfigure(const std::string &ori_optype) const;
+
+  bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type);
+
+  ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type);
+
+ private:
+  std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
+  std::unordered_map<std::string, ParseParamFunc> op_parse_params_fn_map_;
+  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
+  std::unordered_map<std::string, FusionParseParamFunc> fusion_op_parse_params_fn_map_;
+  std::unordered_map<std::string, FusionParseParamByOpFunc> fusion_parse_params_by_op_fn_map_;
+  std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
+  std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
+  std::unordered_map<std::string, std::string> origin_type_to_om_type_;
+  std::unordered_map<std::string, ParseOpToGraphFunc> parse_op_to_graph_fn_map_;
+  std::unordered_map<std::string, ParseSubgraphFuncV2> op_types_to_parse_subgraph_post_func_v2_;
+};
+}  // namespace domi
+#endif  // INC_REGISTER_OP_REGISTRY_H_
diff --git a/third_party/fwkacllib/inc/inc/runtime/base.h b/third_party/fwkacllib/inc/inc/runtime/base.h
new file mode 100644
index 00000000..5b246eed
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/base.h
@@ -0,0 +1,358 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_BASE_H__
+#define __CCE_RUNTIME_BASE_H__
+
+#include <stdint.h>
+#include "toolchain/prof_callback.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+// If you need export the function of this library in Win32 dll, use __declspec(dllexport)
+#ifndef RTS_API
+#ifdef RTS_DLL_EXPORT
+#define RTS_API __declspec(dllexport)
+#else
+#define RTS_API
+#endif
+#endif
+
+typedef int32_t rtError_t;
+static const int32_t RT_ERROR_NONE = 0; // success
+
+/**
+ * @ingroup dvrt_base
+ * @brief runtime exception numbers.
+ */
+typedef enum tagRtExceptionType {
+    RT_EXCEPTION_NONE = 0,
+    RT_EXCEPTION_TS_DOWN = 1,
+    RT_EXCEPTION_TASK_TIMEOUT = 2,
+    RT_EXCEPTION_TASK_FAILURE = 3,
+    RT_EXCEPTION_DEV_RUNNING_DOWN = 4,
+    RT_EXCEPTION_STREAM_ID_FREE_FAILED = 5
+} rtExceptionType;
+
+/**
+ * @ingroup dvrt_base
+ * @brief Switch type.
+ */
+typedef enum tagRtCondition {
+    RT_EQUAL = 0,
+    RT_NOT_EQUAL,
+    RT_GREATER,
+    RT_GREATER_OR_EQUAL,
+    RT_LESS,
+    RT_LESS_OR_EQUAL
+} rtCondition_t;
+
+/**
+ * @ingroup dvrt_base
+ * @brief Data Type of Extensible Switch Task.
+ */
+typedef enum tagRtSwitchDataType {
+    RT_SWITCH_INT32 = 0,
+    RT_SWITCH_INT64 = 1,
+} rtSwitchDataType_t;
+
+typedef enum tagRtStreamFlagType {
+    RT_HEAD_STREAM = 0,  // first stream
+    RT_INVALID_FLAG = 0xFFFFFFFF,
+} rtStreamFlagType_t;
+
+typedef enum tagRtLimitType {
+    RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0,  // timeout for power down , ms
+} rtLimitType_t;
+
+typedef struct rtExceptionInfo {
+    uint32_t taskid;
+    uint32_t streamid;
+    uint32_t tid;
+    uint32_t deviceid;
+    uint32_t retcode;
+} rtExceptionInfo;
+
+typedef void (*rtErrorCallback)(rtExceptionType);
+
+typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
+
+typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);
+
+/**
+ * @ingroup dvrt_base
+ * @brief stream handle.
+ */
+typedef void *rtStream_t;
+
+/**
+ * @ingroup dvrt_base
+ * @brief runtime event handle.
+ */
+typedef void *rtEvent_t;
+
+/**
+ * @ingroup dvrt_base
+ * @brief label handle.
+ */
+typedef void *rtLabel_t;
+
+/**
+ * @ingroup dvrt_base
+ * @brief model handle.
+ */
+typedef void *rtModel_t;
+
+/**
+ * @ingroup profiling_base
+ * @brief runtime handle.
+ */
+RTS_API rtError_t rtSetProfDirEx(const char *profDir, const char *address, const char *jobCtx);
+
+/**
+ * @ingroup profiling_base
+ * @brief init profiler object.
+ */
+RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const char *jobCtx);
+
+/**
+ * @ingroup profiling_base
+ * @brief config rts profiler.
+ */
+RTS_API rtError_t rtProfilerConfig(uint16_t type);
+
+/**
+ * @ingroup profiling_base
+ * @brief start rts profiler.
+ */
+RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
+
+/**
+ * @ingroup profiling_base
+ * @brief stop rts profiler.
+ */
+RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList);
+
+/**
+ * @ingroup profiling_base
+ * @brief ts send keypoint profiler log.
+ */
+RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);
+
+/**
+ * @ingroup profiling_base
+ * @brief ts set profiling reporter callback.
+ */
+RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);
+
+/**
+ * @ingroup dvrt_base
+ * @brief Returns the last error from a runtime call.
+ */
+RTS_API rtError_t rtGetLastError();
+
+/**
+ * @ingroup dvrt_base
+ * @brief Returns the last error from a runtime call.
+ */
+RTS_API rtError_t rtPeekAtLastError();
+
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for error code
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtSetExceptCallback(rtErrorCallback callback);
+
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for task fail
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
+
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for deviceid
+ * @param [in] uniName unique register name, can't be null
+ * @param [in] callback Device state callback function
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);
+
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for fail task 
+ * @param [in] uniName unique register name, can't be null
+ * @param [in] callback fail task callback function
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback);
+
+/**
+ * @ingroup dvrt_base
+ * @brief notify handle.
+ */
+typedef void *rtNotify_t;
+
+/**
+ * @ingroup dvrt_base
+ * @brief create label instance
+ * @param [out]    label   created label
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelCreate(rtLabel_t *label);
+
+/**
+ * @ingroup dvrt_base
+ * @brief create label instance
+ * @param [out] label  created label
+ * @param [in] model  label set model
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelCreateV2(rtLabel_t *label, rtModel_t model);
+
+/**
+ * @ingroup dvrt_base
+ * @brief set label and stream instance
+ * @param [in] label   set label
+ * @param [in] stream  set stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelSet(rtLabel_t label, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief destroy label instance
+ * @param [in] label   label to destroy
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelDestroy(rtLabel_t label);
+
+/**
+ * @ingroup dvrt_base
+ * @brief label switch instance
+ * @param [in] ptr  address to get value compared
+ * @param [in] condition
+ * @param [in] value  to compare
+ * @param [in] true_label   goto label
+ * @param [in] stream  to submit label_switch task
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelSwitch(void *ptr, rtCondition_t condition, uint32_t value, rtLabel_t trueLabel,
+                                rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief goto label instance
+ * @param [in] label   goto label
+ * @param [in] stream  to submit label_goto task
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelGoto(rtLabel_t label, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief name label instance
+ * @param [in] label  instance
+ * @param [in] name  label name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNameLabel(rtLabel_t label, const char *name);
+
+/**
+ * @ingroup dvrt_base
+ * @brief label switch by index
+ * @param [in] ptr  index value ptr
+ * @param [in] max  index max value
+ * @param [in] labelInfoPtr  label content info ptr
+ * @param [in] stream  set stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelSwitchByIndex(void *ptr, uint32_t max, void *labelInfoPtr, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief stream goto label
+ * @param [in] label  goto label
+ * @param [in] stream  stream  to submit label_goto task
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelGotoEx(rtLabel_t label, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief labels to dev info
+ * @param [in] label  model label list
+ * @param [in] labelNumber  label number
+ * @param [in] dst  device ptr
+ * @param [in] dstMax  dst size
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *dst, uint32_t dstMax);
+
+/**
+ * @ingroup dvrt_base
+ * @brief labels to dev info
+ * @param [out] label  created label handle
+ * @param [in] stream  label bind stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief labels to dev info
+ * @param [out] label  created label handle
+ * @param [in] model  label bind model
+ * @param [in] stream  label bind stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_base
+ * @brief get current thread last stream id and task id 
+ * @param [out] stream id and task id
+ * @param [in] null
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for input null ptr
+ */
+RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_BASE_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/config.h b/third_party/fwkacllib/inc/inc/runtime/config.h
new file mode 100644
index 00000000..fed7341a
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/config.h
@@ -0,0 +1,210 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_CONFIG_H__
+#define __CCE_RUNTIME_CONFIG_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
+#define PLAT_GET_ARCH(type)           ((type >> 16) & 0xffff)
+#define PLAT_GET_CHIP(type)           ((type >> 8) & 0xff)
+#define PLAT_GET_VER(type)            (type & 0xff)
+
+typedef enum tagRtArchType {
+    ARCH_BEGIN = 0,
+    ARCH_V100 = ARCH_BEGIN,
+    ARCH_V200,
+    ARCH_END,
+} rtArchType_t;
+
+typedef enum tagRtChipType {
+    CHIP_BEGIN = 0,
+    CHIP_MINI = CHIP_BEGIN,
+    CHIP_CLOUD,
+    CHIP_MDC,
+    CHIP_LHISI,
+    CHIP_DC,
+    CHIP_CLOUD_V2,
+    CHIP_END,
+} rtChipType_t;
+
+typedef enum tagRtAicpuScheType {
+    SCHEDULE_SOFTWARE = 0, /* Software Schedule */
+    SCHEDULE_SOFTWARE_OPT,
+    SCHEDULE_HARDWARE, /* HWTS Schedule */
+} rtAicpuScheType;
+
+typedef enum tagRtVersion {
+    VER_BEGIN = 0,
+    VER_NA = VER_BEGIN,
+    VER_ES,
+    VER_CS,
+    VER_SD3403,
+    VER_END,
+} rtVersion_t;
+
+/* match rtChipType_t */
+typedef enum tagRtPlatformType {
+    PLATFORM_BEGIN = 0,
+    PLATFORM_MINI_V1 = PLATFORM_BEGIN,
+    PLATFORM_CLOUD_V1,
+    PLATFORM_MINI_V2,
+    PLATFORM_LHISI_ES,
+    PLATFORM_LHISI_CS,
+    PLATFORM_DC,
+    PLATFORM_CLOUD_V2,
+    PLATFORM_END,
+} rtPlatformType_t;
+
+typedef enum tagRtCubeFracMKNFp16 {
+    RT_CUBE_MKN_FP16_2_16_16 = 0,
+    RT_CUBE_MKN_FP16_4_16_16,
+    RT_CUBE_MKN_FP16_16_16_16,
+    RT_CUBE_MKN_FP16_Default,
+} rtCubeFracMKNFp16_t;
+
+typedef enum tagRtCubeFracMKNInt8 {
+    RT_CUBE_MKN_INT8_2_32_16 = 0,
+    RT_CUBE_MKN_INT8_4_32_4,
+    RT_CUBE_MKN_INT8_4_32_16,
+    RT_CUBE_MKN_INT8_16_32_16,
+    RT_CUBE_MKN_INT8_Default,
+} rtCubeFracMKNInt8_t;
+
+typedef enum tagRtVecFracVmulMKNFp16 {
+    RT_VEC_VMUL_MKN_FP16_1_16_16 = 0,
+    RT_VEC_VMUL_MKN_FP16_Default,
+} rtVecFracVmulMKNFp16_t;
+
+typedef enum tagRtVecFracVmulMKNInt8 {
+    RT_VEC_VMUL_MKN_INT8_1_32_16 = 0,
+    RT_VEC_VMUL_MKN_INT8_Default,
+} rtVecFracVmulMKNInt8_t;
+
+typedef struct tagRtAiCoreSpec {
+    uint32_t cubeFreq;
+    uint32_t cubeMSize;
+    uint32_t cubeKSize;
+    uint32_t cubeNSize;
+    rtCubeFracMKNFp16_t cubeFracMKNFp16;
+    rtCubeFracMKNInt8_t cubeFracMKNInt8;
+    rtVecFracVmulMKNFp16_t vecFracVmulMKNFp16;
+    rtVecFracVmulMKNInt8_t vecFracVmulMKNInt8;
+} rtAiCoreSpec_t;
+
+typedef struct tagRtAiCoreRatesPara {
+    uint32_t ddrRate;
+    uint32_t l2Rate;
+    uint32_t l2ReadRate;
+    uint32_t l2WriteRate;
+    uint32_t l1ToL0ARate;
+    uint32_t l1ToL0BRate;
+    uint32_t l0CToUBRate;
+    uint32_t ubToL2;
+    uint32_t ubToDDR;
+    uint32_t ubToL1;
+} rtAiCoreMemoryRates_t;
+
+typedef struct tagRtMemoryConfig {
+    uint32_t flowtableSize;
+    uint32_t compilerSize;
+} rtMemoryConfig_t;
+
+typedef struct tagRtPlatformConfig {
+    uint32_t platformConfig;
+} rtPlatformConfig_t;
+
+/**
+ * @ingroup
+ * @brief get AI core count
+ * @param [in] aiCoreCnt
+ * @return aiCoreCnt
+ */
+RTS_API rtError_t rtGetAiCoreCount(uint32_t *aiCoreCnt);
+
+/**
+ * @ingroup
+ * @brief get AI cpu count
+ * @param [in] aiCpuCnt
+ * @return aiCpuCnt
+ */
+RTS_API rtError_t rtGetAiCpuCount(uint32_t *aiCpuCnt);
+
+/**
+ * @ingroup
+ * @brief get AI core frequency
+ * @param [in] aiCoreSpec
+ * @return aiCoreSpec
+ */
+RTS_API rtError_t rtGetAiCoreSpec(rtAiCoreSpec_t *aiCoreSpec);
+
+/**
+ * @ingroup
+ * @brief AI get core band Info
+ * @param [in] aiCoreMemoryRates
+ * @return aiCoreMemoryRates
+ */
+RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRates);
+
+/**
+ * @ingroup
+ * @brief AI get core buffer Info,FlowTable Size,Compiler Size
+ * @param [in] memoryConfig
+ * @return memoryConfig
+ */
+RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
+
+/**
+ * @ingroup
+ * @brief get l2 buffer Info,virtual baseaddr,Size
+ * @param [in] stream
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
+
+/**
+ * @ingroup
+ * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be
+ *        represented by 9020.
+ * @param [out] runtimeVersion
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
+
+
+/**
+ * @ingroup
+ * @brief get device feature ability by device id, such as task schedule ability.
+ * @param [in] deviceId
+ * @param [in] moduleType
+ * @param [in] featureType
+ * @param [out] value
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif // __CCE_RUNTIME_STREAM_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/context.h b/third_party/fwkacllib/inc/inc/runtime/context.h
new file mode 100644
index 00000000..e95d4c89
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/context.h
@@ -0,0 +1,165 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_CONTEXT_H__
+#define __CCE_RUNTIME_CONTEXT_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup rt_context
+ * @brief runtime context handle.
+ */
+typedef void *rtContext_t;
+
+typedef enum tagDryRunFlag {
+    RT_DRYRUN_FLAG_FALSE = 0,
+    RT_DRYRUN_FLAG_TRUE = 1,
+} rtDryRunFlag_t;
+
+typedef enum tagCtxMode {
+    RT_CTX_NORMAL_MODE = 0,
+    RT_CTX_GEN_MODE = 1,
+} rtCtxMode_t;
+
+typedef struct tagRtGroupInfo {
+    int32_t groupId;
+    uint32_t flag;
+    uint32_t aicoreNum;
+    uint32_t aicpuNum;
+    uint32_t aivectorNum;
+    uint32_t sdmaNum;
+    uint32_t activeStreamNum;
+    void *extrPtr;
+} rtGroupInfo_t;
+
+/**
+ * @ingroup rt_context
+ * @brief create context and associates it with the calling thread
+ * @param [out] ctx   created context
+ * @param [in] flags   context creation flag. set to 0.
+ * @param [in] device    device to create context on
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxCreate(rtContext_t *ctx, uint32_t flags, int32_t device);
+
+/**
+ * @ingroup rt_context
+ * @brief create context and associates it with the calling thread
+ * @param [out] ctx   created context
+ * @param [in] flags   context creation flag. set to 0.
+ * @param [in] device    device to create context on
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxCreateEx(rtContext_t *ctx, uint32_t flags, int32_t device);
+
+/**
+ * @ingroup rt_context
+ * @brief destroy context instance
+ * @param [in] ctx   context to destroy
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxDestroy(rtContext_t ctx);
+
+/**
+ * @ingroup rt_context
+ * @brief destroy context instance
+ * @param [in] ctx   context to destroy
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxDestroyEx(rtContext_t ctx);
+
+/**
+ * @ingroup rt_context
+ * @brief binds context to the calling CPU thread.
+ * @param [in] ctx   context to bind. if NULL, unbind current context.
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxSetCurrent(rtContext_t ctx);
+
+/**
+ * @ingroup rt_context
+ * @brief block for a context's tasks to complete
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxSynchronize(void);
+
+/**
+ * @ingroup rt_context
+ * @brief returns the context bound to the calling CPU thread.
+ * @param [out] ctx   returned context
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx);
+
+/**
+ * @ingroup rt_context
+ * @brief returns the primary context of device.
+ * @param [out] ctx   returned context
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx);
+
+/**
+ * @ingroup rt_context
+ * @brief returns the device ID for the current context
+ * @param [out] device   returned device id
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtCtxGetDevice(int32_t *device);
+
+/**
+ * @ingroup
+ * @brief set group id
+ * @param [in] groupid
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtSetGroup(int32_t groupId);
+
+/**
+ * @ingroup
+ * @brief get group info
+ * @param [in] groupid count
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count);
+
+/**
+ * @ingroup
+ * @brief get group count
+ * @param [in] groupid count
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtGetGroupCount(uint32_t *count);
+
+/**
+ * @ingroup rt_context
+ * @brief set context INF mode
+ * @param [in] mode
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtSetCtxINFMode(bool mode);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+
+#endif  // __CCE_RUNTIME_CONTEXT_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/dev.h b/third_party/fwkacllib/inc/inc/runtime/dev.h
new file mode 100644
index 00000000..018f4e6c
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/dev.h
@@ -0,0 +1,369 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_DEVICE_H__
+#define __CCE_RUNTIME_DEVICE_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+#define RT_CAPABILITY_SUPPORT     (0x1)
+#define RT_CAPABILITY_NOT_SUPPORT (0x0)
+
+typedef struct tagRTDeviceInfo {
+    uint8_t env_type;  // 0: FPGA  1: EMU 2: ESL
+    uint32_t ctrl_cpu_ip;
+    uint32_t ctrl_cpu_id;
+    uint32_t ctrl_cpu_core_num;
+    uint32_t ctrl_cpu_endian_little;
+    uint32_t ts_cpu_core_num;
+    uint32_t ai_cpu_core_num;
+    uint32_t ai_core_num;
+    uint32_t ai_core_freq;
+    uint32_t ai_cpu_core_id;
+    uint32_t ai_core_id;
+    uint32_t aicpu_occupy_bitmap;
+    uint32_t hardware_version;
+    uint32_t ts_num;
+} rtDeviceInfo_t;
+
+typedef enum tagRtRunMode {
+    RT_RUN_MODE_OFFLINE = 0,
+    RT_RUN_MODE_ONLINE = 1,
+    RT_RUN_MODE_AICPU_SCHED = 2,
+    RT_RUN_MODE_RESERVED
+} rtRunMode;
+
+typedef enum tagRtAicpuDeployType {
+    AICPU_DEPLOY_CROSS_OS = 0x0,
+    AICPU_DEPLOY_CROSS_PROCESS = 0x1,
+    AICPU_DEPLOY_CROSS_THREAD = 0x2,
+    AICPU_DEPLOY_RESERVED
+} rtAicpuDeployType_t;
+
+typedef enum tagRtFeatureType {
+    FEATURE_TYPE_MEMCPY = 0,
+    FEATURE_TYPE_RSV
+} rtFeatureType_t;
+
+typedef enum tagRtDeviceFeatureType {
+  FEATURE_TYPE_SCHE,
+  FEATURE_TYPE_END,
+} rtDeviceFeatureType_t;
+
+typedef enum tagMemcpyInfo {
+    MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
+    MEMCPY_INFO_RSV
+} rtMemcpyInfo_t;
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get total device number.
+ * @param [in|out] count the device number
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDeviceCount(int32_t *count);
+/**
+ * @ingroup dvrt_dev
+ * @brief get device ids
+ * @param [in|out] get details of device ids
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_DRV_ERR for error
+ */
+RTS_API rtError_t rtGetDeviceIDs(uint32_t *devices, uint32_t len);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get device infomation.
+ * @param [in] device   the device id
+ * @param [in] moduleType   module type
+               typedef enum {
+                    MODULE_TYPE_SYSTEM = 0,   system info
+                    MODULE_TYPE_AICPU,        aicpu info
+                    MODULE_TYPE_CCPU,         ccpu_info
+                    MODULE_TYPE_DCPU,         dcpu info
+                    MODULE_TYPE_AICORE,       AI CORE info
+                    MODULE_TYPE_TSCPU,        tscpu info
+                    MODULE_TYPE_PCIE,         PCIE info
+               } DEV_MODULE_TYPE;
+ * @param [in] infoType   info type
+               typedef enum {
+                    INFO_TYPE_ENV = 0,
+                    INFO_TYPE_VERSION,
+                    INFO_TYPE_MASTERID,
+                    INFO_TYPE_CORE_NUM,
+                    INFO_TYPE_OS_SCHED,
+                    INFO_TYPE_IN_USED,
+                    INFO_TYPE_ERROR_MAP,
+                    INFO_TYPE_OCCUPY,
+                    INFO_TYPE_ID,
+                    INFO_TYPE_IP,
+                    INFO_TYPE_ENDIAN,
+               } DEV_INFO_TYPE;
+ * @param [out] value   the device info
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_DRV_ERR for error
+ */
+RTS_API rtError_t rtGetDeviceInfo(uint32_t deviceId, int32_t moduleType, int32_t infoType, int64_t *value);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set target device for current thread
+ * @param [int] device   the device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDevice(int32_t device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set target device for current thread
+ * @param [int] device   the device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDeviceEx(int32_t device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get Index by phyId.
+ * @param [in] phyId   the physical device id
+ * @param [out] devIndex   the logic device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDeviceIndexByPhyId(uint32_t phyId, uint32_t *devIndex);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get phyId by Index.
+ * @param [in] devIndex   the logic device id
+ * @param [out] phyId   the physical device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDevicePhyIdByIndex(uint32_t devIndex, uint32_t *phyId);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief enable direction:devIdDes---->phyIdSrc.
+ * @param [in] devIdDes   the logical device id
+ * @param [in] phyIdSrc   the physical device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEnableP2P(uint32_t devIdDes, uint32_t phyIdSrc, uint32_t flag);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief disable direction:devIdDes---->phyIdSrc.
+ * @param [in] devIdDes   the logical device id
+ * @param [in] phyIdSrc   the physical device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get cability of P2P omemry copy betwen device and peeredevic.
+ * @param [in] device   the logical device id
+ * @param [in] peerDevice   the physical device id
+ * @param [outv] *canAccessPeer   1:enable 0:disable
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get status
+ * @param [in] devIdDes   the logical device id
+ * @param [in] phyIdSrc   the physical device id
+ * @param [in|out] status   status value
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetP2PStatus(uint32_t devIdDes, uint32_t phyIdSrc, uint32_t *status);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get value of current thread
+ * @param [in|out] pid   value of pid
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtDeviceGetBareTgid(uint32_t *pid);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get target device of current thread
+ * @param [in|out] device   the device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDevice(int32_t *device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief reset all opened device
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceReset(int32_t device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief reset opened device
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceResetEx(int32_t device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get total device infomation.
+ * @param [in] device   the device id
+ * @param [in] type     limit type RT_LIMIT_TYPE_LOW_POWER_TIMEOUT=0
+ * @param [in] value    limit value
+ * @param [out] info   the device info
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceSetLimit(int32_t device, rtLimitType_t type, uint32_t value);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief Wait for compute device to finish
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceSynchronize(void);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get priority range of current device
+ * @param [in|out] leastPriority   least priority
+ * @param [in|out] greatestPriority   greatest priority
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceGetStreamPriorityRange(int32_t *leastPriority, int32_t *greatestPriority);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief Set exception handling callback function
+ * @param [in] callback   rtExceptiontype
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetExceptCallback(rtErrorCallback callback);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief Setting Scheduling Type of Graph
+ * @param [in] tsId   the ts id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetTSDevice(uint32_t tsId);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief init aicpu executor
+ * @param [out] runtime run mode
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_DRV_ERR for can not get run mode
+ */
+RTS_API rtError_t rtGetRunMode(rtRunMode *mode);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get aicpu deploy
+ * @param [out] aicpu deploy
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_DRV_ERR for can not get aicpu deploy
+ */
+RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deployType);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set chipType
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtSetSocVersion(const char *version);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get chipType
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetSocVersion(char *version, const uint32_t maxLen);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get status
+ * @param [in] devId   the logical device id
+ * @param [in] otherDevId   the other logical device id
+ * @param [in] infoType   info type
+ * @param [in|out] value   pair info
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *value);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief get capability infomation.
+ * @param [in] featureType  feature type
+               typedef enum tagRtFeatureType {
+                    FEATURE_TYPE_MEMCPY = 0,
+                    FEATURE_TYPE_RSV,
+               } rtFeatureType_t;
+ * @param [in] featureInfo  info type
+               typedef enum tagMemcpyInfo {
+                    MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
+                    MEMCPY_INFO _RSV,
+               } rtMemcpyInfo_t;
+ * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief set target device for current thread
+ * @param [int] device   the device id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
+
+/**
+ * @ingroup dvrt_dev
+ * @brief reset all opened device
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_DEVICE_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/inc/runtime/dvfsprofile.h
new file mode 100644
index 00000000..6e451695
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/dvfsprofile.h
@@ -0,0 +1,63 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_DVFSPROFILE_H__
+#define __CCE_RUNTIME_DVFSPROFILE_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+typedef enum dvfsProfileMode {
+  DVFS_PROFILE_PERFORMANCE_PRIORITY,
+  DVFS_PROFILE_BALANCE_PRIORITY,
+  DVFS_PROFILE_POWER_PRIORITY,
+  DVFS_PROFILE_PRIORITY_MAX
+} DvfsProfileMode;
+
+/**
+ * @ingroup dvrt_dvfsprofile
+ * @brief Set the performance mode of the device
+ * @param [in] mode   dvfsProfileMode
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode);
+
+/**
+ * @ingroup dvrt_dvfsprofile
+ * @brief Set the performance mode of the device
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for invalid value
+ */
+RTS_API rtError_t rtUnsetDvfsProfile();
+
+/**
+ * @ingroup dvrt_dvfsprofile
+ * @brief Get the current performance mode of the device
+ * @param [in|out] pmode   dvfsProfileMode type pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_PROFILE_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/event.h b/third_party/fwkacllib/inc/inc/runtime/event.h
new file mode 100644
index 00000000..41e611ea
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/event.h
@@ -0,0 +1,246 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_EVENT_H__
+#define __CCE_RUNTIME_EVENT_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup event_flags
+ * @brief event op bit flags
+ */
+#define RT_EVENT_DEFAULT (0x00)
+#define RT_EVENT_WITH_FLAG (0x01)
+
+/**
+ * @ingroup dvrt_event
+ * @brief create event instance
+ * @param [in|out] event   created event
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEventCreate(rtEvent_t *event);
+
+/**
+ * @ingroup dvrt_event
+ * @brief create event instance with flag
+ * @param [in|out] event   created event  flag event op flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag);
+
+/**
+ * @ingroup dvrt_event
+ * @brief destroy event instance
+ * @param [in] event   event to destroy
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEventDestroy(rtEvent_t event);
+
+/**
+ * @ingroup dvrt_event
+ * @brief get event id
+ * @param [in] event_ event to be get
+ * @param [in|out] event_id   event_id id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetEventID(rtEvent_t event, uint32_t *eventId);
+
+/**
+ * @ingroup dvrt_event
+ * @brief event record
+ * @param [int] event   event to record
+ * @param [int] stream   stream handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEventRecord(rtEvent_t event, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_event
+ * @brief event reset
+ * @param [int] event   event to reset
+ * @param [int] stream   stream handle
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtEventReset(rtEvent_t event, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_event
+ * @brief wait event to be complete
+ * @param [in] event   event to wait
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEventSynchronize(rtEvent_t event);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Queries an event's status
+ * @param [in] event   event to query
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_EVENT_NOT_COMPLETE for not complete
+ */
+RTS_API rtError_t rtEventQuery(rtEvent_t event);
+
+/**
+ * @ingroup dvrt_event
+ * @brief computes the elapsed time between events.
+ * @param [in] time   time between start and end in ms
+ * @param [in] start  starting event
+ * @param [in] end  ending event
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtEventElapsedTime(float *time, rtEvent_t start, rtEvent_t end);
+
+/**
+ * @ingroup dvrt_event
+ * @brief get the elapsed time from a event after event recorded.
+ * @param [in] time   time in ms
+ * @param [in] event  event handle
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event);
+
+/**
+ * @ingroup dvrt_event
+ * @brief name an event
+ * @param [in] event  event to be named
+ * @param [in] name  identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input of event, name
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtNameEvent(rtEvent_t event, const char *name);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Create a notify
+ * @param [in] device_id  device id
+ * @param [in|out] notify_   notify to be created
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNotifyCreate(int32_t deviceId, rtNotify_t *notify);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Destroy a notify
+ * @param [in] notify_   notify to be destroyed
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Record a notify
+ * @param [in] notify_ notify to be recorded
+ * @param [in] stream_  input stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
+ */
+RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Wait for a notify
+ * @param [in] notify_ notify to be wait
+ * @param [in] stream_  input stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
+ */
+RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Name a notify
+ * @param [in] notify_ notify to be named
+ * @param [in|out] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char *name);
+
+/**
+ * @ingroup dvrt_event
+ * @brief get notify id
+ * @param [in] notify_ notify to be get
+ * @param [in|out] notify_id   notify id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notifyId);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Set a notify to IPC notify
+ * @param [in] notify_ notify to be set to IPC notify
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input of
+ */
+RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Open IPC notify
+ * @param [out] notify the opened notify
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtIpcOpenNotify(rtNotify_t *notify, const char *name);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Get the physical address corresponding to notify
+ * @param [in] notify notify to be queried
+ * @param [in] devAddrOffset  device physical address offset
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffset);
+
+/**
+ * @ingroup dvrt_event
+ * @brief Ipc set notify pid
+ * @param [in] name name to be queried
+ * @param [in] pid  process id
+ * @param [in] num  length of pid[]
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_EVENT_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/kernel.h b/third_party/fwkacllib/inc/inc/runtime/kernel.h
new file mode 100644
index 00000000..0ec1a163
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/kernel.h
@@ -0,0 +1,601 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_KERNEL_H__
+#define __CCE_RUNTIME_KERNEL_H__
+
+#include "base.h"
+#include "stream.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup rt_kernel
+ * @brief shared memory data control
+ */
+typedef struct tagRtSmData {
+    uint64_t L2_mirror_addr;          // preload or swap source address
+    uint32_t L2_data_section_size;    // every data size
+    uint8_t L2_preload;               // 1 - preload from mirrorAddr, 0 - no preload
+    uint8_t modified;                 // 1 - data will be modified by kernel, 0 - no modified
+    uint8_t priority;                 // data priority
+    int8_t prev_L2_page_offset_base;  // remap source section offset
+    uint8_t L2_page_offset_base;      // remap destination section offset
+    uint8_t L2_load_to_ddr;           // 1 - need load out, 0 - no need
+    uint8_t reserved[2];              // reserved
+} rtSmData_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief shared memory description
+ */
+typedef struct tagRtSmCtrl {
+    rtSmData_t data[8];  // data description
+    uint64_t size;       // max page Num
+    uint8_t remap[64];   /* just using for static remap mode, default:0xFF
+                          array index: virtual l2 page id, array value: physic l2 page id */
+    uint8_t l2_in_main;  // 0-DDR, 1-L2, default:0xFF
+    uint8_t reserved[3];
+} rtSmDesc_t;
+
+typedef rtSmDesc_t rtL2Ctrl_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief device binary type
+ */
+typedef struct tagRtDevBinary {
+    uint32_t magic;    // magic number
+    uint32_t version;  // version of binary
+    const void *data;  // binary data
+    uint64_t length;   // binary length
+} rtDevBinary_t;
+
+/**
+  * @ingroup rt_kernel
+  * @brief function mode type
+  */
+#define ONLINE_PROF_MAX_PMU_NUM (8)
+
+typedef struct ProfilefDataInfo {
+    const void *stubFunc;
+    uint32_t blockDim;
+    const void *args;
+    uint32_t argsSize;
+    rtSmDesc_t *smDesc;
+    rtStream_t stream;
+    uint64_t totalcycle;
+    uint64_t ovcycle;
+    uint64_t pmu_cnt[ONLINE_PROF_MAX_PMU_NUM];
+} rtProfDataInfo_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief function mode type
+ */
+typedef enum {
+    FUNC_MODE_NORMAL = 0,
+    FUNC_MODE_PCTRACE_USERPROFILE_RECORDLOOP,
+    FUNC_MODE_PCTRACE_USERPROFILE_SKIPLOOP,
+    FUNC_MODE_PCTRACE_CYCLECNT_RECORDLOOP,
+    FUNC_MODE_PCTRACE_CYCLECNT_SKIPLOOP,
+    FUNC_MODE_BUTT
+} rtFuncModeType_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief kernel info
+ */
+typedef struct rtKernelInfo {
+    uint64_t task_offset;  // kernel offset in module
+    /* flowtable */
+    void *arg;  // launch kernel arg
+    uint32_t arg_size;
+    /* module */
+    void *module_addr;  // module::baseaddr_
+    uint32_t module_size;
+} *rtKernelInfo_t;
+
+/**
+ * @ingroup rt_KernelConfigDump
+ * @brief device dump type
+ */
+typedef enum tagRtDumpKind {
+    RT_DATA_DUMP_KIND_INVALID = -1,
+    RT_DATA_DUMP_KIND_DUMP = 0,
+    RT_DATA_DUMP_KIND_RESERVED
+} rtDumpKind_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief report callback
+ */
+typedef rtError_t (*rtKernelReportCallback)(rtStream_t stream, rtKernelInfo_t kernelInfo);
+
+/**
+ * @ingroup rt_kernel
+ * @brief stream report callback
+ */
+typedef void (*rtCallback_t)(void *fnData);
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of plain binary for aicore
+ */
+#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of plain binary for aicpu
+ */
+#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of plain binary for aivector
+ */
+#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of elf binary for aicore
+ */
+#define RT_DEV_BINARY_MAGIC_ELF 0x43554245
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of elf binary for aicpu
+ */
+#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of elf binary for aivector
+ */
+#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of elf binary for aicube
+ */
+#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41415247
+
+/**
+ * @ingroup rt_kernel
+ * @brief magic number of elf binary for aivector
+ */
+#define RT_DEV_BINARY_MAGIC_ELF_AIVECTOR 0x41415248
+
+/**
+ * @ingroup rt_kernel_flags
+ * @brief kernel op bit flags
+ */
+#define RT_KERNEL_DEFAULT (0x00)
+#define RT_KERNEL_CONVERT (0x01)
+#define RT_KERNEL_DUMPFLAG (0x02)
+#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
+#define RT_KERNEL_CUSTOM_AICPU (0x08)
+
+/**
+ * @ingroup rt_kernel
+ * @brief kernel mode
+**/
+#define RT_DEFAULT_KERNEL_MODE (0x00)
+#define RT_NORMAL_KERNEL_MODE (0x01)
+#define RT_ALL_KERNEL_MODE (0x02)
+
+/**
+ * @ingroup rt_kernel
+ * @brief kernel L1 Fusion Dump bit flags
+ */
+#define RT_DDR_ADDR (0x0)
+
+/**
+ * @ingroup rt_kernel
+ * @brief register device binary
+ * @param [in] bin   device binary description
+ * @param [out] handle   device binary handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
+
+/**
+ * @ingroup rt_kernel
+ * @brief register device binary with all kernel
+ * @param [in] bin   device binary description
+ * @param [out] handle   device binary handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);
+
+/**
+ * @ingroup rt_kernel
+ * @brief register fast memeory device binary
+ * @param [in] handle   device binary handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtBinaryRegisterToFastMemory(void *handle);
+
+/**
+ * @ingroup rt_kernel
+ * @brief unregister device binary
+ * @param [in] handle   device binary handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDevBinaryUnRegister(void *handle);
+
+/**
+ * @ingroup rt_kernel
+ * @brief register device binary metadata
+ * @param [in] handle    device binary description
+ * @param [in] metadata  device binary metadata
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMetadataRegister(void *handle, const char *metadata);
+
+/**
+ * @ingroup rt_kernel
+ * @brief register device binary dependency
+ * @param [in] mHandle   master device binary description
+ * @param [in] sHandle   slave device binary description
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle);
+
+/**
+ * @ingroup rt_kernel
+ * @brief register device function
+ * @param [in] binHandle   device binary handle
+ * @param [in] stubFunc   stub function
+ * @param [in] stubName   stub function name
+ * @param [in] devFunc   device function description. symbol name or address
+ *                       offset, depending binary type.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char *stubName, const void *devFunc,
+                                     uint32_t funcMode);
+
+/**
+ * @ingroup rt_kernel
+ * @brief find stub function by name
+ * @param [in] stubName   stub function name
+ * @param [out] stubFunc   stub function
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetFunctionByName(const char *stubName, void **stubFunc);
+
+/**
+ * @ingroup rt_kernel
+ * @brief find addr by stub func
+ * @param [in] stubFunc   stub function
+ * @param [out] addr
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetAddrByFun(const void *stubFunc, void **addr);
+/**
+ * @ingroup rt_kernel
+ * @brief query registered or not by stubName
+ * @param [in] stubName   stub function name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtQueryFunctionRegistered(const char *stubName);
+
+/**
+ * @ingroup rt_kernel
+ * @brief config data dump
+ * @param [in] dumpSizePerBlock  dump size
+ * @param [in] blockDim   block dimentions
+ * @param [in] dumpBaseAddr   dump base address
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, uint32_t blockDim, void **dumpBaseAddr,
+                                     rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel to device
+ * @param [in] stubFunc   stub function
+ * @param [in] blockDim   block dimentions
+ * @param [in] args   argments address for kernel function
+ * @param [in] argsSize   argements size
+ * @param [in] smDesc   shared memory description
+ * @param [in] stream   associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
+                                 rtSmDesc_t *smDesc, rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel with handle to device
+ * @param [in] handle   program
+ * @param [in] devFunc   device function description.
+ * @param [in] blockDim   block dimentions
+ * @param [in] args   argments address for kernel function
+ * @param [in] argsSize   argements size
+ * @param [in] smDesc   shared memory description
+ * @param [in] stream   associated stream
+ * @param [in] kernelInfo   kernel info
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
+                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel to device
+ * @param [in] stubFunc   stub function
+ * @param [in] blockDim   block dimentions
+ * @param [in] args   argments address for kernel function
+ * @param [in] argsSize   argements size
+ * @param [in] smDesc   shared memory description
+ * @param [in] stream   associated stream
+ * @param [in] flag   dump flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
+                                         rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel to device
+ * @param [in] args       argments address for kernel function
+ * @param [in] argsSize   argements size
+ * @param [in] flags      launch flags
+ * @param [in] stream     associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelLaunchEx(void *args, uint32_t argsSize, uint32_t flags, rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch cpu kernel to device
+ * @param [in] soName        so name
+ * @param [in] kernelName    kernel name
+ * @param [in] blockDim      block dimentions
+ * @param [in] args          argments address for kernel function
+ * @param [in] argsSize      argments size
+ * @param [in] smDesc        shared memory description
+ * @param [in] stream        associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtCpuKernelLaunch(const void *soName, const void *kernelName, uint32_t blockDim, const void *args,
+                                    uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch cpu kernel to device  with dump identifier
+ * @param [in] soName        so name
+ * @param [in] kernelName    kernel name
+ * @param [in] blockDim      block dimentions
+ * @param [in] args          argments address for kernel function
+ * @param [in] argsSize      argments size
+ * @param [in] smDesc        shared memory description
+ * @param [in] stream        associated stream
+ * @param [in] flag          dump flag or others function flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim,
+                                            const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
+                                            uint32_t flags);
+
+/**
+ * @ingroup rt_kernel
+ * @brief L1 fusion dump addr transfered to device
+ * @param [in] model    handle info
+ * @param [in] addr     ddr address of L1 Fusion Dump
+ * @param [in] dumpSize memory size
+ * @param [in] flag     memory flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag);
+
+/**
+ * @ingroup rt_kernel
+ * @brief load dump info to aicpu
+ * @param [in] dumpInfo   dump info
+ * @param [in] length   length of  dump info
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);
+
+#ifndef __CLANG_CCE_RUNTIME_H__
+#define __CLANG_CCE_RUNTIME_H__
+/**
+ * @ingroup rt_kernel
+ * @brief configure call argment for next rtLaunch in current thread
+ * @param [in] numBlocks   block dimentions
+ * @param [in] smDesc   shared memory description
+ * @param [in] stream   associated stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+#ifdef __cplusplus
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc = nullptr, rtStream_t stream = nullptr);
+#else
+RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStream_t stream);
+
+#endif
+#endif  // __CLANG_CCE_RUNTIME_H__
+
+/**
+ * @ingroup rt_kernel
+ * @brief setup argment for next rtLaunch in current thread
+ * @param [in] arg   argment address for kernel function
+ * @param [in] size   argment size
+ * @param [in] offset  argment table offset
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset);
+
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel to device with previous setting kernel argment
+ *        and call argment
+ * @param [in] stubFunc   stub function
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtLaunch(const void *stubFunc);
+
+/**
+ * @ingroup rt_kernel
+ * @brief implicitly transfered data to device.
+ *        lifecycle end after next kernel task finish
+ * @param [in] ptr   host memory
+ * @param [in] size   host memory size
+ * @param [in] flag   reserved. set to 0
+ * @param [out] arg   returned arg. used for next kernel's arg.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg);
+
+/**
+ * @ingroup rt_kernel
+ * @brief start fusion kernels.
+ * @param [in] stream   stream for fusion kernels
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelFusionStart(rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief end fusion kernels.
+ * @param [in] stream   stream for fusion kernels
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelFusionEnd(rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief set kernelinfo callback
+ * @param [in] callback
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetKernelReportCallback(rtKernelReportCallback callBack);
+
+/**
+ * @ingroup rt_kernel
+ * @brief subscribe stream callback report.
+ * @param [in] threadId   thread id for stream
+ * @param [in] stream   stream for subscribe
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stream);
+
+/**
+ * @ingroup rt_kernel
+ * @brief add callback launch task in stream.
+ * @param [in] callBackFunc   app callback function
+ * @param [in] fnData   user data
+ * @param [in] stream   subscribed stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stream, bool isBlock);
+
+/**
+ * @ingroup rt_kernel
+ * @brief process callback report.
+ * @param [in] timeout   if timeout=-1, while(1); else timeout
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtProcessReport(int32_t timeout);
+
+/**
+ * @ingroup rt_kernel
+ * @brief unsubscribe callback report.
+ * @param [in] threadId   thread id for stream
+ * @param [in] stream   stream for subscribe
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream);
+
+/**
+ * @ingroup profiling_base
+ * @brief start online prof.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum);
+
+/**
+ * @ingroup profiling_base
+ * @brief stop online prof.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStopOnlineProf(rtStream_t stream);
+
+/**
+ * @ingroup profiling_base
+ * @brief get online prof.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum);
+
+/**
+ * @ingroup profiling_base
+ * @brief start mdc profiler.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length);
+
+/**
+ * @ingroup profiling_base
+ * @brief stop mdc profiler.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStopMDCProfiler(void *addr);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_KERNEL_H__
+
diff --git a/third_party/fwkacllib/inc/inc/runtime/mem.h b/third_party/fwkacllib/inc/inc/runtime/mem.h
new file mode 100644
index 00000000..30af85d9
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/mem.h
@@ -0,0 +1,543 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_MEM_H__
+#define __CCE_RUNTIME_MEM_H__
+
+/*lint -e7*/
+#include <stddef.h>
+/*lint +e7*/
+#include "base.h"
+#include "config.h"
+#include "stream.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory type
+ */
+#define RT_MEMORY_DEFAULT ((uint32_t)0x0)   // default memory on device
+#define RT_MEMORY_HBM ((uint32_t)0x2)       // HBM memory on device
+#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3)  // RDMA-HBM memory on device
+#define RT_MEMORY_DDR ((uint32_t)0x4)       // DDR memory on device
+#define RT_MEMORY_SPM ((uint32_t)0x8)       // shared physical memory on device
+#define RT_MEMORY_P2P_HBM ((uint32_t)0x10)  // HBM memory on other 4P device
+#define RT_MEMORY_P2P_DDR ((uint32_t)0x11)  // DDR memory on other device
+#define RT_MEMORY_DDR_NC ((uint32_t)0x20)   // DDR memory of non-cache
+#define RT_MEMORY_TS_4G ((uint32_t)0x40)
+#define RT_MEMORY_TS ((uint32_t)0x80)
+#define RT_MEMORY_RESERVED ((uint32_t)0x100)
+
+#define RT_MEMORY_L1 ((uint32_t)0x1<<16)
+#define RT_MEMORY_L2 ((uint32_t)0x1<<17)
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory info type
+ */
+#define RT_MEM_INFO_TYPE_DDR_SIZE          ((uint32_t)0x1)
+#define RT_MEM_INFO_TYPE_HBM_SIZE          ((uint32_t)0x2)
+#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE      ((uint32_t)0x3)
+#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE      ((uint32_t)0x4)
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory Policy
+ */
+#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0)                     // Malloc mem prior hage page, then default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10)    // Malloc mem prior hage page, then default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11)     // Malloc mem only use hage page
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12)  // Malloc mem only use default page
+#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13)    // Malloc mem prior hage page, then default page, use for p2p
+#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14)     // Malloc mem only use hage page, use for p2p
+#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15)  // Malloc mem only use default page, use for p2p
+
+#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF)  // mem type bit in <0, 9>
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory type | memory Policy
+ */
+typedef uint32_t rtMemType_t;
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory advise type
+ */
+#define RT_MEMORY_ADVISE_EXE (0x02)
+#define RT_MEMORY_ADVISE_THP (0x04)
+#define RT_MEMORY_ADVISE_PLE (0x08)
+#define RT_MEMORY_ADVISE_PIN (0x16)
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory copy type
+ */
+typedef enum tagRtMemcpyKind {
+    RT_MEMCPY_HOST_TO_HOST = 0,  // host to host
+    RT_MEMCPY_HOST_TO_DEVICE,    // host to device
+    RT_MEMCPY_DEVICE_TO_HOST,    // device to host
+    RT_MEMCPY_DEVICE_TO_DEVICE,  // device to device, 1P && P2P
+    RT_MEMCPY_MANAGED,           // managed memory
+    RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
+    RT_MEMCPY_HOST_TO_DEVICE_EX, // host  to device ex (only used for 8 bytes)
+    RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
+    RT_MEMCPY_RESERVED,
+} rtMemcpyKind_t;
+
+typedef enum tagRtMemInfoType {
+    RT_MEMORYINFO_DDR,
+    RT_MEMORYINFO_HBM,
+    RT_MEMORYINFO_DDR_HUGE,               // Hugepage memory of DDR
+    RT_MEMORYINFO_DDR_NORMAL,             // Normal memory of DDR
+    RT_MEMORYINFO_HBM_HUGE,               // Hugepage memory of HBM
+    RT_MEMORYINFO_HBM_NORMAL,             // Normal memory of HBM
+    RT_MEMORYINFO_DDR_P2P_HUGE,           // Hugepage memory of DDR
+    RT_MEMORYINFO_DDR_P2P_NORMAL,         // Normal memory of DDR
+    RT_MEMORYINFO_HBM_P2P_HUGE,           // Hugepage memory of HBM
+    RT_MEMORYINFO_HBM_P2P_NORMAL,         // Normal memory of HBM
+} rtMemInfoType_t;
+
+typedef enum tagRtRecudeKind {
+    RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10,  // D2D, SDMA inline reduce, include 1P, and P2P
+    RT_RECUDE_KIND_END
+} rtRecudeKind_t;
+
+typedef enum tagRtDataType {
+    RT_DATA_TYPE_FP32 = 0,  // fp32
+    RT_DATA_TYPE_FP16 = 1,  // fp16
+    RT_DATA_TYPE_INT16 = 2, // int16
+    RT_DATA_TYPE_END
+} rtDataType_t;
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory copy channel  type
+ */
+typedef enum tagRtMemcpyChannelType {
+    RT_MEMCPY_CHANNEL_TYPE_INNER = 0,  // 1P
+    RT_MEMCPY_CHANNEL_TYPE_PCIe,
+    RT_MEMCPY_CHANNEL_TYPE_HCCs,  // not support now
+    RT_MEMCPY_CHANNEL_TYPE_RESERVED,
+} rtMemcpyChannelType_t;
+
+/**
+ * @ingroup rt_kernel
+ * @brief ai core memory size
+ */
+typedef struct rtAiCoreMemorySize {
+    uint32_t l0ASize;
+    uint32_t l0BSize;
+    uint32_t l0CSize;
+    uint32_t l1Size;
+    uint32_t ubSize;
+    uint32_t l2Size;
+    uint32_t l2PageNum;
+    uint32_t blockSize;
+    uint64_t bankSize;
+    uint64_t bankNum;
+    uint64_t burstInOneBlock;
+    uint64_t bankGroupNum;
+} rtAiCoreMemorySize_t;
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory type
+ */
+typedef enum tagRtMemoryType {
+    RT_MEMORY_TYPE_HOST = 1,
+    RT_MEMORY_TYPE_DEVICE = 2,
+    RT_MEMORY_TYPE_SVM = 3,
+    RT_MEMORY_TYPE_DVPP = 4
+} rtMemoryType_t;
+
+/**
+ * @ingroup dvrt_mem
+ * @brief memory attribute
+ */
+typedef struct tagRtPointerAttributes {
+    rtMemoryType_t memoryType;  // host memory or device memory
+    rtMemoryType_t locationType;
+    uint32_t deviceID;          // device ID
+    uint32_t pageSize;
+} rtPointerAttributes_t;
+
+
+typedef struct rtMallocHostSharedMemoryIn {
+    const char *name;
+    const uint64_t size;
+    uint32_t flag;
+} rtMallocHostSharedMemoryIn;
+
+typedef struct rtMallocHostSharedMemoryOut {
+    int fd;
+    void *ptr;
+    void *devPtr;
+} rtMallocHostSharedMemoryOut;
+
+typedef struct rtFreeHostSharedMemoryIn {
+    const char *name;
+    const uint64_t size;
+    int fd;
+    void *ptr;
+    void *devPtr;
+} rtFreeHostSharedMemoryIn;
+
+
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc device memory
+ * @param [in|out] devPtr   memory pointer
+ * @param [in] size   memory size
+ * @param [in] type   memory type
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief free device memory
+ * @param [in|out] devPtr   memory pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtFree(void *devPtr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc device memory for dvpp
+ * @param [in|out] devPtr   memory pointer
+ * @param [in] size   memory size
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief free device memory for dvpp
+ * @param [in|out] devPtr   memory pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDvppFree(void *devPtr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc host memory
+ * @param [in|out] hostPtr   memory pointer
+ * @param [in] size   memory size
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief free host memory
+ * @param [in] hostPtr   memory pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtFreeHost(void *hostPtr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc host shared memory
+ * @param [in] in   alloc host shared memory inputPara pointer
+ * @param [in] out   alloc host shared memory outputInfo pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+
+RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
+                                           rtMallocHostSharedMemoryOut *out);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief free host memory
+ * @param [in] in   free host shared memory inputPara pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+
+RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc managed memory
+ * @param [in|out] ptr   memory pointer
+ * @param [in] size   memory size
+ * @param [in] flag   reserved, set to 0.
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief free managed memory
+ * @param [in] ptr   memory pointer
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemFreeManaged(void *ptr);
+/**
+ * @ingroup dvrt_mem
+ * @brief alloc cached device memory
+ * @param [in| devPtr   memory pointer
+ * @param [in] size     memory size
+ * @param [in] type     memory type
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief flush device mempory
+ * @param [in] base   virtal base address
+ * @param [in] len    memory size
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtFlushCache(void *base, size_t len);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief invalid device mempory
+ * @param [in] base   virtal base address
+ * @param [in] len    memory size
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtInvalidCache(void *base, size_t len);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief synchronized memcpy
+ * @param [in] dst   destination address pointer
+ * @param [in] Max length of destination address memory
+ * @param [in] src   source address pointer
+ * @param [in] count   the number of byte to copy
+ * @param [in] kind   memcpy type
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief asynchronized memcpy
+ * @param [in] dst   destination address pointer
+ * @param [in] Max length of destination address memory
+ * @param [in] src   source address pointer
+ * @param [in] count   the number of byte to copy
+ * @param [in] kind   memcpy type
+ * @param [in] stream   asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
+                                rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief asynchronized reduce memcpy
+ * @param [in] dst   destination address pointer
+ * @param [in] Max length of destination address memory
+ * @param [in] src   source address pointer
+ * @param [in] count   the number of byte to copy
+ * @param [in] kind   memcpy type
+ * @param [in] type   data type
+ * @param [in] stream   asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
+                                rtDataType_t type, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief query memory size
+ * @param [in] aiCoreMemorySize
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
+       integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
+ * @param [in] aiCoreMemorySize
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief set memory with uint32_t value
+ * @param [in] devPtr
+ * @param [in] Max length of destination address memory
+ * @param [in] value
+ * @param [in] count byte num
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief set memory with uint32_t value async
+ * @param [in] devPtr
+ * @param [in] Max length of destination address memory
+ * @param [in] value
+ * @param [in] count byte num
+ * @param [in] stream
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief get current device memory total and free
+ * @param [out] free
+ * @param [out] total
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief get current device memory total and free
+ * @param [in] memInfoType
+ * @param [out] free
+ * @param [out] total
+ * @return RT_ERROR_NONE for ok, errno for failed
+ */
+RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief set memory with uint32_t value
+ * @param [in] devPtr
+ * @param [in] len
+ * @param [in] device
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief get memory attribute:Host or Device
+ * @param [in] ptr
+ * @param [out] attributes
+ * @return RT_ERROR_NONE for ok, errno for failed
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief make memory shared interprocess and assigned a name
+ * @param [in] ptr    device memory address pointer
+ * @param [in] name   identification name
+ * @param [in] byteCount   identification byteCount
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief destroy a interprocess shared memory
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtIpcDestroyMemoryName(const char *name);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief open a interprocess shared memory
+ * @param [in|out] ptr    device memory address pointer
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief close a interprocess shared memory
+ * @param [in] ptr    device memory address pointer
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief HCCL Async memory cpy
+ * @param [in] index sq index
+ * @param [in] wqeIndex moudle index
+ * @param [in] stream asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief Ipc set mem pid
+ * @param [in] name name to be queried
+ * @param [in] pid  process id
+ * @param [in] num  length of pid[]
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
+
+/**
+ * @ingroup dvrt_mem
+ * @brief HCCL Async memory cpy
+ * @param [in] dbindex single device 0
+ * @param [in] dbinfo doorbell info
+ * @param [in] stream asynchronized task stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ * @return RT_ERROR_DRV_ERR for driver error
+ */
+RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_MEM_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/rt.h b/third_party/fwkacllib/inc/inc/runtime/rt.h
new file mode 100644
index 00000000..83cafa3c
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/rt.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_RT_H__
+#define __CCE_RUNTIME_RT_H__
+
+#include "base.h"
+#include "config.h"
+#include "context.h"
+#include "dev.h"
+#include "dvfsprofile.h"
+#include "event.h"
+#include "kernel.h"
+#include "mem.h"
+#include "rt_model.h"
+#include "stream.h"
+
+#endif  // __CCE_RUNTIME_RT_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/inc/runtime/rt_model.h
new file mode 100644
index 00000000..e6d849c8
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/rt_model.h
@@ -0,0 +1,470 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_MODEL_H__
+#define __CCE_RUNTIME_MODEL_H__
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+typedef enum tagModelTaskType {
+    RT_MODEL_TASK_KERNEL = 0,
+    RT_MODEL_TASK_EVENT_RECORD,
+    RT_MODEL_TASK_EVENT_WAIT,
+    RT_MODEL_TASK_FUSION_START,
+    RT_MODEL_TASK_FUSION_END,
+    RT_MODEL_TASK_KERNEL_EX,
+    RT_MODEL_TASK_HCCL,
+    RT_MODEL_TASK_STREAM_SWITCH,
+    RT_MODEL_TASK_STREAM_ACTIVE,
+    RT_MODEL_TASK_LABEL_SET,
+    RT_MODEL_TASK_LABEL_SWITCH,
+    RT_MODEL_TASK_LABEL_GOTO,
+    RT_MODEL_TASK_PROFILER_TRACE,
+    RT_MODEL_TASK_MEMCPY_ASYNC,
+    RT_MODEL_TASK_NOTIFY_RECORD,
+    RT_MODEL_TASK_NOTIFY_WAIT,
+    RT_MODEL_TASK_REDUCE_ASYNC,
+    RT_MODEL_TASK_RDMA_SEND,
+    RT_MODEL_TASK_EVENT_RESET = 18,
+    RT_MODEL_TASK_MODEL_END_GRAPH,
+    RT_MODEL_TASK_STREAM_SWITCH_N,
+    RT_MODEL_TASK_RDMA_DB_SEND,
+    RT_MODEL_TASK_MEMCPY_ADDR_ASYNC,
+    RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
+    RT_MODEL_TASK_STREAM_LABEL_GOTO,
+    RT_MODEL_TASK_MODEL_EXIT,
+    RT_MODEL_TASK_ALL_KERNEL,
+} rtModelTaskType_t;
+
+typedef enum tagModelStreamType {
+    RT_MODEL_HEAD_STREAM = 0,
+    RT_MODEL_WAIT_ACTIVE_STREAM = 1
+} rtModelStreamType_t;
+
+typedef enum tagModelQueueFlag {
+    RT_MODEL_INPUT_QUEUE = 0,
+    RT_MODEL_OUTPUT_QUEUE = 1
+} rtModelQueueFlag_t;
+
+#define EXECUTOR_NONE ((uint32_t)0x0)
+#define EXECUTOR_TS ((uint32_t)0x01)
+#define EXECUTOR_AICPU ((uint32_t)0x02)
+
+/*
+ * @ingroup rt_model
+ * @brief debug flag for kernel exception dump
+ */
+#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0)
+#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1)
+
+/**
+ * @ingroup
+ * @brief the type defination of aicpu model task command
+ */
+typedef enum tagTsAicpuModelCmd {
+    TS_AICPU_MODEL_LOAD = 1,
+    TS_AICPU_MODEL_EXECUTE,
+    TS_AICPU_MODEL_DESTROY,
+    TS_AICPU_MODEL_ABORT,
+    TS_AICPU_MODEL_RESERVED,
+} tsAicpuModelCmd;
+
+typedef struct tagAicpuTaskInfo {
+    uint32_t taskID;
+    uint32_t streamID;
+    uint32_t kernelType;
+    uint64_t kernelName;
+    uint64_t kernelSo;
+    uint64_t paraBase;
+    uint32_t taskFlag;
+} rtAicpuTaskInfo_t;
+
+typedef struct tagModelStreamInfo {
+    uint32_t streamID;
+    uint32_t streamFlag;
+} rtModelStreamInfo_t;
+
+typedef struct tagModelQueueInfo {
+    uint32_t queueID;
+    uint32_t flag;
+} rtModelQueueInfo_t;
+
+typedef struct tagAicpuModelInfo {
+    uint32_t moduleID;
+    uint32_t tsId;
+    uint16_t streamInfoNum;
+    uint16_t aicpuTaskNum;
+    uint64_t streamInfoPtr;
+    uint64_t aicpuTaskPtr;
+    uint16_t queueSize;
+    uint64_t queueInfoPtr;
+} rtAicpuModelInfo_t;
+
+typedef struct tagKernelTaskInfo {
+    uint16_t blockDim;
+    uint16_t argsCount;
+    uint16_t argsSize;
+    uint16_t reserved;
+    char *stubFunc;
+    uint8_t *smDesc;
+    uint8_t *args;
+    uint16_t *argsOffset;
+} rtKernelTaskInfo_t;
+
+typedef struct tagAllKernelTaskInfo {
+    uint16_t blockDim;
+    uint16_t argsCount;
+    uint16_t argsSize;
+    uint16_t reserved;
+    void *devfunc;
+    void *handle;
+    uint8_t *smDesc;
+    uint8_t *args;
+    uint16_t *argsOffset;
+} rtAllKernelTaskInfo_t;
+
+typedef struct tagKernelTaskInfoEx {
+    uint32_t flags;
+    uint32_t argsSize;
+    void *args;
+    uint32_t reserved[6];
+} rtKernelTaskInfoEx_t;
+
+typedef struct tagEventTaskInfo {
+    uint32_t eventID;
+    uint32_t reserved[9];
+} rtEventTaskInfo_t;
+
+typedef struct tagStreamSwitchTaskInfo {
+    int64_t value;
+    uint64_t pValuePtr;
+    uint32_t trueStreamID;
+    uint32_t dataType;
+    uint32_t reserved[4];
+} rtStreamSwitchTaskInfo_t;
+
+typedef struct tagStreamSwitchNTaskInfo {
+    uint64_t pValuePtr;
+    uint64_t pTrueStreamPtr;
+    uint32_t size;
+    uint32_t elementSize;
+    uint32_t dataType;
+    uint32_t reserved[3];
+} rtStreamSwitchNTaskInfo_t;
+
+typedef struct tagStreamActiveTaskInfo {
+    uint32_t activeStreamID;
+    uint32_t reserved[9];
+} rtStreamActiveTaskInfo_t;
+
+typedef struct tagSetTaskInfo {
+    uint16_t labelId;
+    uint32_t reserved[9];
+} rtLabelSetTaskInfo_t;
+
+typedef struct tagSwitchTaskInfo {
+    uint32_t value;
+    uint32_t reserved[9];
+} rtLabelSwitchTaskInfo_t;
+
+typedef struct tagLabelGotoTaskInfo {
+    uint16_t labelId;
+    uint32_t reserved[9];
+} rtLabelGotoTaskInfo_t;
+
+typedef struct tagProfilerTraceTaskInfo {
+    uint64_t profilerTraceId;
+    uint32_t notify : 8;
+    uint32_t reserved_ : 24;
+    uint32_t flags;
+    uint32_t reserved[6];
+} rtProfilerTrace_t;
+
+typedef struct tagrtMemcpyAsyncTaskInfo {
+    void *dst;
+    uint64_t destMax;
+    void *src;
+    uint64_t count;
+    uint32_t kind;
+    uint32_t reserved;
+} rtMemcpyAsyncTaskInfo_t;
+
+typedef struct tagrtNotifyTaskInfo {
+    uint32_t notifyID;
+    uint32_t reserved[9];
+} rtNotifyTaskInfo_t;
+
+typedef struct tagrtReduceAsyncTaskInfo {
+    void *dst;
+    uint64_t destMax;
+    void *src;
+    uint64_t count;
+    uint32_t kind;
+    uint32_t type;
+} rtReduceAsyncTaskInfo_t;
+
+typedef struct tagrtRdmaSendTaskInfo {
+    uint32_t index;
+    uint32_t wqe_index;
+    uint32_t reserved[8];
+} rtRdmaSendTaskInfo_t;
+
+typedef struct tagrtRdmaDbSendTaskInfo {
+    uint64_t dbInfo;
+    uint32_t dbIndex;
+    uint32_t reserved[7]; // offset 7
+} rtRdmaDbSendTaskInfo_t;
+
+typedef struct tagrtModelEndGraphTaskInfo {
+    uint32_t modelId;
+    uint32_t executorFlag;
+    uint32_t reserved[8];
+} rtModelEndGraphTaskInfo_t;
+
+typedef struct tagrtModelExitInfo {
+    uint32_t modelId;
+    uint32_t streamId;
+    uint32_t reserved[8];
+} rtModelExitTaskInfo_t;
+
+
+typedef struct tagrtStreamLabelSwitchByIndexTask_t {
+    uint64_t indexPtr;
+    uint64_t labelInfoPtr;
+    uint32_t max;
+    uint8_t reserved[20];
+} rtStreamLabelSwitchByIndexTask_t;
+
+typedef struct tagrtStreamLabelGotoTask_t {
+    uint16_t labelId;
+    uint16_t modelId;
+    uint8_t reserved[36];
+} rtStreamLabelGotoTask_t;
+
+typedef struct tagTaskInfo {
+    uint32_t type;
+    uint32_t streamID;
+    union {
+        rtKernelTaskInfoEx_t kernelTaskEx;
+        rtKernelTaskInfo_t kernelTask;
+        rtAllKernelTaskInfo_t allKernelTask;
+        rtEventTaskInfo_t eventTask;
+        rtStreamSwitchTaskInfo_t streamSwitchTask;
+        rtStreamActiveTaskInfo_t streamActiveTask;
+        rtLabelSetTaskInfo_t labelSetTask;
+        rtLabelSwitchTaskInfo_t labelSwitchTask;
+        rtLabelGotoTaskInfo_t labelGotoTask;
+        rtProfilerTrace_t profilertraceTask;
+        rtMemcpyAsyncTaskInfo_t memcpyAsyncTask;
+        rtNotifyTaskInfo_t notifyTask;
+        rtReduceAsyncTaskInfo_t reduceAsyncTask;
+        rtRdmaSendTaskInfo_t rdmaSendTask;
+        rtRdmaDbSendTaskInfo_t rdmaDbSendTask;
+        rtModelEndGraphTaskInfo_t modelEndGraphTask;
+        rtModelExitTaskInfo_t modelExitTask;
+        rtStreamSwitchNTaskInfo_t streamSwitchNTask;
+        rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask;
+        rtStreamLabelGotoTask_t streamLabelGotoTask;
+        uint32_t reserved[10];
+    } u;
+} rtTaskInfo_t;
+
+typedef struct tagLabelDevInfo_t {
+    uint16_t modelId;
+    uint16_t streamId;
+    uint16_t labelId;
+}rtLabelDevInfo;
+
+typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);
+
+/**
+ * @ingroup rt_model
+ * @brief set callback for generate model
+ * @param [in] callBack   callback function
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback);
+
+/**
+ * @ingroup rt_model
+ * @brief create model instance
+ * @param [out]    model   created model
+ * @param [in]     flag    reserved
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelCreate(rtModel_t *model, uint32_t flag);
+
+/**
+ * @ingroup rt_model
+ * @brief destroy model instance
+ * @param [in] model   model to destroy
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelDestroy(rtModel_t model);
+
+/**
+ * @ingroup rt_model
+ * @brief bind model and stream instance
+ * @param [in] model   binded model
+ * @param [in] stream  binded stream
+ * @param [in] flag    reserved
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelBindStream(rtModel_t model, rtStream_t stream, uint32_t flag);
+
+/**
+ * @ingroup rt_model
+ * @brief unbind model and stream instance
+ * @param [in] model   unbinded model
+ * @param [in] stream  unbinded stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelUnbindStream(rtModel_t model, rtStream_t stream);
+
+/**
+ * @ingroup rt_model
+ * @brief tell runtime Model has been Loaded
+ * @param [in] model   model to execute
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtModelLoadComplete(rtModel_t model);
+
+/**
+ * @ingroup rt_model
+ * @brief execute model instance
+ * @param [in] model   model to execute
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t flag);
+
+/**
+ * @ingroup rt_model
+ * @brief get model the last persist task id
+ * @param [in] model   model to execute
+ * @param [out] taskid last task id of the model
+ * @param [out] streamid last steam id of the model
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid);
+
+/**
+ * @ingroup rt_model
+ * @brief add a end graph task to stream
+ * @param [in] model   model to execute
+ * @param [in] end graph stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEndGraph(rtModel_t model, rtStream_t stream);
+
+/**
+ * @ingroup rt_model
+ * @brief add a end graph task with flag to stream
+ * @param [in] model   model to execute
+ * @param [in] end graph stream
+ * @param [in] flags   AICPU datadump
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtEndGraphEx(rtModel_t model, rtStream_t stream, uint32_t flags);
+
+/**
+ * @ingroup rt_model
+ * @brief add a end graph task to stream
+ * @param [in] model   model to execute
+ * @param [in] flags EXECUTOR_TS | EXECUTOR_AICPU
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelExecutorSet(rtModel_t model, uint8_t flags);
+
+/**
+ * @ingroup rt_model
+ * @brief abort model
+ * @param [in] model   model to abort
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelAbort(rtModel_t model);
+
+/**
+ * @ingroup rt_model
+ * @brief end graph task to model default stream
+ * @param [in] model   model to execute
+ * @param [in] end graph stream
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelExit(rtModel_t model, rtStream_t stream);
+
+/**
+ * @ingroup rt_model
+ * @brief bind queue
+ * @param [in] model     model to bind
+ * @param [in] queueId   queueId to bind
+ * @param [in] flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelBindQueue(rtModel_t model, uint32_t queueId, rtModelQueueFlag_t flag);
+
+/**
+ * @ingroup rt_model
+ * @brief get model id
+ * @param [in] model
+ * @param [out] modelId   model id
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtModelGetId(rtModel_t model, uint32_t *modelId);
+
+/*
+ * @ingroup rt_model
+ * @brief enable debug for dump overflow exception
+ * @param [in] addr: ddr address of kernel exception dumpped
+ * @param [in] model: model handle
+ * @param [in] flag: debug flag
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr,
+                                  uint32_t *streamId, uint32_t *taskId);
+
+/*
+ * @ingroup rt_model
+ * @brief disable debug for dump overflow exception
+ * @param [in] model: model handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_MODEL_H__
diff --git a/third_party/fwkacllib/inc/inc/runtime/stream.h b/third_party/fwkacllib/inc/inc/runtime/stream.h
new file mode 100644
index 00000000..6b9f80ae
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/runtime/stream.h
@@ -0,0 +1,196 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef __CCE_RUNTIME_STREAM_H__
+#define __CCE_RUNTIME_STREAM_H__
+
+#include "base.h"
+#include "event.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup stream_flags
+ * @brief stream op bit flags
+ */
+#define RT_STREAM_DEFAULT (0x00)
+#define RT_STREAM_PERSISTENT (0x01)
+#define RT_STREAM_FORCE_COPY (0x02)
+#define RT_STREAM_HUGE (0x04)
+#define RT_STREAM_AICPU (0x08)
+#define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
+#define RT_STREAM_HEAD (0x20)
+#define RT_STREAM_PRIMARY_DEFAULT (0x40)
+#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)
+
+/**
+ * @ingroup stream_type
+ * @brief stream type
+ */
+#define RT_NORMAL_STREAM    (0x00)
+#define RT_HUGE_STREAM      (0x01)
+
+/**
+ * priority level default value when create a stream
+ */
+#define RT_STREAM_PRIORITY_DEFAULT (0)
+
+/**
+ * @ingroup dvrt_stream
+ * @brief create stream instance
+ * @param [in|out] stream   created stream
+ * @param [in] priority   stream priority
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamCreate(rtStream_t *stream, int32_t priority);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief create stream instance
+ * @param [in|out] stream   created stream
+ * @param [in] priority   stream priority
+ * @param [in] flags  stream op flags
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamCreateWithFlags(rtStream_t *stream, int32_t priority, uint32_t flags);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief destroy stream instance.
+ * @param [in] stream   the stream to destroy
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamDestroy(rtStream_t stream);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief wait an recorded event for stream
+ * @param [in] stream   the wait stream
+ * @param [in] event   the event to wait
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief wait stream to be complete
+ * @param [in] stream   stream to wait
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamSynchronize(rtStream_t stream);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief queries an asynchronous stream for completion status
+ * @param [in] stream   stream to query
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_STREAM_NOT_COMPLETE for not complete
+ */
+RTS_API rtError_t rtStreamQuery(rtStream_t stream);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief get stream id from a stream handle
+ * @param [in] stream   stream hadle
+ * @param [in] streamId   stream id
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief inquire max stream count and max task count per stream
+ * @param [in] streamType   Stream Type
+ * @param [in] MaxStrCount   Max stream count
+ * @param [in] MaxTaskCount   max task count per stream
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCount, uint32_t *maxTaskCount);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief Name a stream
+ * @param [in] stream  stream to be named
+ * @param [in] name   identification name
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtNameStream(rtStream_t stream, const char *name);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief switch to the corresponding stream according to the contents of the ptr
+ * @param [in] ptr  Determine the address where the value of the true and false branches is located
+ * @param [in] condition switch condition
+ * @param [in] value  switch value
+ * @param [in] trueStream  Stream that needs to be activated when the value is non-zero
+ * @param [in] stream input stream to init task
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t trueStream,
+                                 rtStream_t stream);
+
+/**
+ * @brief execute extensible stream switch task
+ * @param [in] ptr   pointer of value
+ * @param [in] condition   judge condition
+ * @param [in] value_ptr   pointer of target value
+ * @param [in] true_stream   stream to be activated when value is not zero
+ * @param [in] stream   stream id
+ * @param [in] dataType   data type of target value
+ * @return RT_ERROR_NONE for complete
+ */
+RTS_API rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *valuePtr, rtStream_t trueStream,
+                                   rtStream_t stream, rtSwitchDataType_t dataType);
+
+/**
+ * @ingroup dvrt_stream
+ * @brief Active a stream
+ * @param [in] activeStream stream to be activated
+ * @param [in] stream input stream to init task
+ * @return RT_ERROR_NONE for complete
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
+
+/**
+ * @brief execute extensible stream case switch task
+ * @param [in] ptr   pointer of value
+ * @param [in] size  pointer num of value
+ * @param [in] valuePtr  pointer of target value, length = size * elementSize
+ * @param [in] trueStreamPtr streams to be activated
+ * @param [in] elementSize  size of to be activated true streams
+ * @param [in] stream input stream to init task
+ * @param [in] dataType   data type of target value
+ * @return RT_ERROR_NONE for complete
+ */
+RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
+                                  uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+
+#endif  // __CCE_RUNTIME_STREAM_H__
diff --git a/third_party/fwkacllib/inc/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/inc/soft_dp/ExternalSoftDp.h
new file mode 100644
index 00000000..bef5c05d
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/soft_dp/ExternalSoftDp.h
@@ -0,0 +1,52 @@
+/**
+* @file ExternalSoftDp.h
+*
+* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef EXTERNALSOFTDP_H
+#define EXTERNALSOFTDP_H
+
+#include <stdint.h>
+
+extern "C" {
+struct SoftDpProcsessInfo {
+    uint8_t* inputBuffer;
+    uint32_t inputBufferSize;
+
+    uint8_t* outputBuffer;
+    uint32_t outputBufferSize;
+
+    uint32_t outputWidth;
+    uint32_t outputHeight;
+
+    uint32_t reserved;
+};
+
+struct DpCropInfo {
+    uint32_t left;
+    uint32_t right;
+    uint32_t up;
+    uint32_t down;
+};
+
+/*
+ * @brief decode and resize interface
+ * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
+ * @return success: return 0, fail: return error number
+ */
+uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo);
+
+/*
+ * @brief decode crop and resize interface
+ * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct
+ * @param [in] const DpCropInfo& cropInfo: crop struct
+ * @return success: return 0, fail: return error number
+ */
+uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo);
+}
+#endif // EXTERNALSOFTDP_H
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/tdt/data_common.h b/third_party/fwkacllib/inc/inc/tdt/data_common.h
new file mode 100644
index 00000000..7b1d631b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/tdt/data_common.h
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_INNER_INC_DATA_COMMON_H_
+#define HOST_INNER_INC_DATA_COMMON_H_
+
+namespace tdt {
+#ifndef TDT_DATA_TYPE
+#define TDT_DATA_TYPE
+
+/**
+ * @ingroup  Tdt data.
+ *
+ * Tdt data type.
+ */
+enum TdtDataType {
+  TDT_IMAGE_LABEL = 0, /**< Image label*/
+  TDT_TFRECORD,        /**< TF Record*/
+  TDT_DATA_LABEL,      /**< Data label*/
+  TDT_END_OF_SEQUENCE, /**< End of Sequence*/
+  TDT_TENSOR,          /**< Tensor*/
+  TDT_ABNORMAL,        /**< ABNORMAL*/
+  TDT_DATATYPE_MAX     /**< Max*/
+};
+#endif
+
+/**
+ * @ingroup  Tdt data.
+ *
+ * Tdt push data between host and device.
+ */
+struct TdtDataItem {
+  TdtDataType dataType_;          /**< Input data type*/
+  uint64_t label_;                /**< Input data label*/
+  uint64_t dataLen_;              /**< Input data type length*/
+  uint64_t realDataLen_;          /**< Real Input data type length*/
+  std::string tensorShape_;       /**< Tensor shape*/
+  std::string tensorType_;        /**< Tensor type*/
+  uint32_t cnt_;                  /**< Data  count*/
+  uint32_t currentCnt_;           /**< Data  current count*/
+  uint64_t index_;                /**< Data  inde*/
+  std::string tensorName_;        /**< Tensor  name*/
+  uint64_t md5ValueHead_;         /**< Data  md5*/
+  uint64_t md5ValueTail_;         /**< Data  md5*/
+  std::shared_ptr<void> dataPtr_; /**< Data  pointer*/
+  std::string headMD5_;           /**< MD5 header, 8byte*/
+  std::string tailMD5_;           /**< MD5 tail, 8byte*/
+};
+
+/**
+ * @ingroup  Tdt data.
+ *
+ * Tdt push data for queuedataset ort mind-data.
+ */
+struct DataItem {
+  TdtDataType dataType_;          /**< Input data type*/
+  std::string tensorName_;        /**< Tensor  name*/
+  std::string tensorShape_;       /**< Tensor shape*/
+  std::string tensorType_;        /**< Tensor type*/
+  uint64_t dataLen_;              /**< Input data type length*/
+  std::shared_ptr<void> dataPtr_; /**< Data  pointer*/
+};
+
+/**
+ * @ingroup  Tsdclient.
+ *
+ * tsdclient func type;
+ */
+enum TsdCmdType {
+    TSDCLOSE = 0,
+    TSDOPEN = 1
+};
+
+/**
+ * @ingroup  Tsdclient.
+ *
+ * tsdclient func input value object.
+ */
+enum InputItem {
+    OPEN_DEVICEID = 0,
+    OPEN_RANKSIZE,
+    CLOSE_DEVICEID
+};
+
+}  // namespace tdt
+#endif  // HOST_INNER_INC_DATA_COMMON_H_
diff --git a/third_party/fwkacllib/inc/inc/tdt/index_transform.h b/third_party/fwkacllib/inc/inc/tdt/index_transform.h
new file mode 100644
index 00000000..a5af2c83
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/tdt/index_transform.h
@@ -0,0 +1,29 @@
+/**
+* @file index_transform.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved.
+*
+* This program is used to get logical device id by phy device id.
+*/
+
+#ifndef INC_TDT_INDEX_TRANSFORM_H
+#define INC_TDT_INDEX_TRANSFORM_H
+
+#include "stdint.h"
+/**
+* @ingroup IndexTransform
+* @brief get logical device id by phy device id.
+*
+* @par Function get logical device id by phy device id.
+*
+* @param  phyId [IN] physical device id
+* @param  logicalId [OUT] logical device id
+* @retval 0 Success
+* @retval OtherValues Fail
+*
+* @par Dependency
+* @li libruntime.so: Library to which the interface belongs.
+*/
+
+int32_t IndexTransform(const uint32_t phyId, uint32_t &logicId);
+#endif
diff --git a/third_party/fwkacllib/inc/inc/tdt/status.h b/third_party/fwkacllib/inc/inc/tdt/status.h
new file mode 100644
index 00000000..d5050f35
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/tdt/status.h
@@ -0,0 +1,763 @@
+﻿/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_TDT_STATUS_H_
+#define INC_TDT_STATUS_H_
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+#include <map>
+#include <mutex>
+#include <string>
+#include <vector>
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+using TDT_StatusT = uint32_t;
+#else
+typedef uint32_t TDT_StatusT;
+#endif
+
+#define LINUX 0
+#define WINDOWS 1
+
+#ifndef TDT_LIB_EXPORT
+#if(TARGET_SYSTEM_NAME == WINDOWS)
+#define TDT_LIB_EXPORT __declspec(dllexport)
+#else
+#define TDT_LIB_EXPORT __attribute__((visibility("default")))
+#endif
+#endif
+/**
+ * @ingroup  tdt status.
+ *
+ * Tdt debug level
+ */
+enum {
+  TDT_DEBUG = 0,   /**< Debug*/
+  TDT_INFO = 1,    /**< Info*/
+  TDT_WARNING = 2, /**< Warning*/
+  TDT_ERROR = 3,   /**< Error*/
+  TDT_FATAL = 4,   /**< Fatal*/
+  TDT_EVENT = 5,   /**< Event*/
+  TDT_OPLOG = 6,   /**< Oplog*/
+  TDT_TRACE = 7    /**< Trace*/
+};
+
+enum {
+  TDT_OK_CODE = 0,
+  TDT_DEBUG_INFO_CODE,
+  TDT_INTERNAL_ERROR_CODE,
+  TDT_COMMON_WARNING_CODE,
+  TDT_PREFETCH_STOPED_CODE,
+  TDT_FILE_SIZE_TOO_LARGE_CODE,
+  TDT_FILE_INVALID_PATH_CODE,
+  TDT_MEMORY_EXHAUSTED_CODE,
+  TDT_INTERGER_REVERSED_CODE,
+  TDT_FILE_NOT_EXIST_CODE,
+  TDT_DEFAULT_CONFIG_FILE_NOT_EXIST_CODE,
+  TDT_INSTANCE_NOT_INITIALED_CODE,
+  TDT_INITIAL_FAILED_CODE,
+  TDT_INSTANCE_NOT_FOUND_CODE,
+  TDT_HDC_CREATE_SESSION_FAILED_CODE,
+  TDT_HDC_DESTROY_SESSION_FAILED_CODE,
+  TDT_HDC_SESSION_DO_NOT_EXIST_CODE,
+  TDT_PID_IS_EXIST_CODE,
+  TDT_HDC_SRV_INIT_ERROR_CODE,
+  TDT_HDC_SRV_CREATE_ERROR_CODE,
+  TDT_HDC_SRV_DESTROY_ERROR_CODE,
+  TDT_HDC_SRV_ACCEPT_ERROR_CODE,
+  TDT_HDC_SRV_CLOSED_ERROR_CODE,
+  TDT_HDC_INTERNAL_ERROR_CODE,
+  TDT_HDC_INFO_CODE,
+  TDT_HDC_SEND_ERROR_CODE,
+  TDT_MESSAGE_PARSE_ERROR_CODE,
+  TDT_HDC_SEG_SIZE_ERROR_CODE,
+  TDT_HDC_MESSAGE_NULL_CODE,
+  TDT_HDC_SEARFUNC_IS_NULL_CODE,
+  TDT_HDC_SENDMSG_FAILED_CODE,
+  TDT_HDC_SRV_CLOSE_CHILD_SESSION_ERROR_CODE,
+  TDT_HDC_SRV_CLOSE_SERVER_SESSION_ERROR_CODE,
+  TDT_HDC_SRV_HEART_BEAT_TIMEOUT_CODE,  // 30
+  TDT_HDC_DRV_ERROR_CODE,
+  TDT_HDC_SERVER_CLIENT_SOCKET_CLOSED_CODE,
+  TDT_TSD_START_FAIL_CODE,
+  TDT_TSD_CLEANPROC_FIRST_GETPID_FAILED_CODE,
+  TDT_TSD_CLEANPROC_KILL_PROCESS_FAILED_CODE,
+  TDT_TSD_CLEANPROC_SECOND_GETPID_FAILED_CODE,
+  TDT_TSD_CLEANPROC_FINAL_FAILED_CODE,
+  TDT_TSD_INIT_STATE_FAILED_CODE,
+  TDT_TSD_INIT_HDCSERVER_FAILED_CODE,
+  TDT_TSD_SEND_HEARTBEAT_FAILED_CODE,
+  TDT_TSD_CLEAN_RESOURCE_FAILED_CODE,
+  TDT_TSD_SEND_MSG_FAILED_CODE,
+  TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE,
+  TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE,
+  TDT_PPC_DRIVER_INIT_FAIL_CODE,
+  TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE,
+  TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE,
+  TDT_PPC_SERVER_CLOSE_CODE,
+  TDT_PPC_GET_SET_MSG_BUFFER_FAIL_CODE,
+  TDT_PPC_SESSION_CONNECT_FAIL_CODE,  // 40
+  TDT_PPC_SESSION_NOT_EXISTED_CODE,
+  TDT_PPC_SEND_RECEIVE_MSG_FAIL_CODE,
+  TDT_PPC_MSG_FREE_FAIL_CODE,
+  TDT_PPC_ALLOC_MSG_FAIL_CODE,
+  TDT_PPC_MSG_LEN_NOT_MATCH_CODE,
+  TDT_PPC_MSG_BUF_NULL_CODE,
+  TDT_PPC_CLIENT_INVALID_PARAM_CODE,
+  TDT_PPC_SERVER_INVALID_PARAM_CODE,
+  TDT_PPC_CLIENT_RECVDATA_CONTINUE_CODE,
+  TDT_PPC_SERVER_CLIENT_SOCKET_CLOSED_CODE,  // 50
+  TDT_PPC_RECV_MSG_ERROR_CODE,
+  TDT_PPC_SESSION_CLOSE_ERROR_CODE,
+  TDT_SHUFFLE_SHUFFLE_SIZE_ILLEGAL_CODE,
+  TDT_SHUFFLE_ONLINE_UNIQUE_SEED_ILLEGAL_CODE,
+  TDT_SHUFFLE_UNABLE_TO_CREATE_SHUFFLE_LIST_CODE,
+  TDT_SHUFFLE_ILLEGAL_SHUFFLE_TYPE_CODE,
+  TDT_PREFETCH_ILLEGAL_DATATYPE_CODE,
+  TDT_SUPERVISOR_UNKOWN_JOB_STATE_CODE,
+  TDT_MAP_BUFFER_ERROR_CODE,
+  TDT_ALLOC_BUFFER_FAILED_CODE,
+  TDT_FREE_HDC_BUFFER_FAILED_CODE,
+  TDT_DATA_SIZE_WRONG_CODE,
+  TDT_MEMORY_POOL_INITED_CODE,
+  TDT_SENDMSG_FAILED_CODE,
+  TDT_INVALID_VALUE_CODE,
+  TDT_NO_USEFUL_MEMORY_CODE,
+  TDT_MESSAGE_NULL_CODE,
+  TDT_MEMORY_POOL_STOPPED_CODE,
+  TDT_HDC_MEMORY_ADDR_NOT_ALIGN_CODE,
+  TDT_MEMORY_POOL_GET_NULL_CODE,
+  TDT_MEMORY_POOL_NOT_EXISTED_CODE,
+  TDT_RECOVER_DATA_FAILED_CODE,
+  TDT_MEMORY_STATUS_ERROR_CODE,
+  TDT_MEMORY_POOL_UPDATE_FAILED_CODE,
+  TDT_MEMORY_POOL_RESIZE_FAILED_CODE,
+  TDT_MEMORY_DESTROY_FAILED_CODE,
+  TDT_EXCEED_MAX_THREAD_CODE,
+  TDT_WARNING_SET_THREAD_NAME_FAILED_CODE,
+  TDT_WRONG_PRIORITY_CODE,
+  TDT_JOIN_TASK_ERROR_CODE,
+  TDT_NULL_FUNC_CODE,
+  TDT_INIT_FAIL_CODE,
+  TDT_EXISTED_FUNC_CODE,
+  TDT_FILE_GET_FILE_STATE_FAIL_CODE,
+  TDT_FILE_OPEN_FILE_FAIL_CODE,
+  TDT_FILE_FILE_DESTROYED_CODE,
+  TDT_FILE_UNABLE_TO_GET_FILE_MEMORY_CODE,
+  TDT_PREFETCH_UNABLE_TO_GET_TDTDATAITEM_CODE,
+  TDT_HDCSERVER_DO_NOT_EXIST_CODE,
+  TDT_HDCSESSIONID_NOT_AVAILABLE_CODE,
+  TDT_SET_HDCSESSION_REFERENCE_FAILED_CODE,
+  TDT_HDC_RECV_MSG_ERROR_CODE,
+  TDT_HDC_SEND_MSG_ERROR_CODE,
+  TDT_FILE_CONTENT_EMPTY_CODE,
+  TDT_TDTSEVER_ACCEPT_FAILED_CODE,
+  TDT_CHANNEL_DO_NOT_EXIST_CODE,
+  TDT_NULL_POINTER_MSG_CODE,
+  TDT_TRAN_UNKNOWN_RSP_CODE,
+  TDT_TRAN_TIMEOUT_CODE,
+  TDT_TRAN_NOT_EXIST_CODE,
+  TDT_TRAN_ID_GEN_ERROR_CODE,
+  TDT_SEND_CHANNEL_FAILED_CODE,
+  TDT_SEND_CHANNEL_TIMEOUT_CODE,
+  TDT_QUEUE_STOPPED_CODE,
+  TDT_QUEUE_POP_FAILED_CODE,
+  TDT_QUEUE_PUSH_FAILED_CODE,
+  TDT_QUEUE_NOT_FIND_CODE,
+  TDT_QUEUE_CREATE_FAILED_CODE,
+  TDT_QUEUE_FULL_CODE,
+  TDT_QUEUE_EMPTY_CODE,
+  TDT_DATA_ENTO_CP_FAILED_CODE,
+  TDT_STOP_CP_QUEUE_FAILED_CODE,
+  TDT_RECV_MSG_NO_CHANNEL_INFO_ERROR_CODE,
+  TDT_CHANNEL_HAS_NO_SESSION_ERROR_CODE,
+  TDT_PREFETCH_SAMPLE_HAS_NO_LABEL_CODE,
+  TDT_HDC_CLIENT_INIT_ERROR_CODE,
+  TDT_HDC_CLIENT_CREATE_SESSION_ERROR_CODE,
+  TDT_HDC_CLIENT_DO_NOT_EXIST_CODE,
+  TDT_HDC_CLIENT_DESTROY_ERROR_CODE,
+  TDT_BIND_CPUCORE_FAILED_CODE,
+  TDT_HDC_CLIENT_CLOSED_CODE,
+  TDT_HDC_SRV_CLOSED_CODE,
+  TDT_HDC_SRV_TYPE_ERROR_CODE,
+  TDT_TSD_CLT_OPEN_FAILED_CODE,
+  TDT_TSD_CLT_CLOSE_FAILED_CODE,
+  TDT_TSD_CLT_UPDATE_PROFILING_FAILED_CODE,
+  TDT_TSD_CLT_INTERFACE_NOT_SUPPORT_CODE,
+  TDT_SUPERVISOR_ILLEGAL_HEARTBEAT_TIME_CODE,
+  TDT_SUPERVISOR_INOTIFY_READ_SIZE_ERROR_CODE,
+  TDT_SUPERVISOR_INOTIFY_INTERRUPT_CODE,
+  TDT_SUPERVISOR_INOTIFY_INIT_ERROR_CODE,
+  TDT_SUPERVISOR_CLOSE_INOTIFYFD_FAIL_CODE,
+  TDT_SUPERVISOR_INOTIFY_WATCH_ERROR_CODE,
+  TDT_TRANSFER_CANNOT_OPEN_CONFIGFILE_CODE,
+  TDT_TRANSFER_PARSE_FILE_FAILED_CODE,
+  TDT_TRANSFER_NO_CHANNEL_DATA_CODE,
+  TDT_PREFETCH_CREATE_FAILED_CODE,
+  TDT_TRANSFER_NO_PARAMETER_CODE,
+  TDT_TRANSFER_NO_PARAMETER_ARG_CODE,
+  TDT_FILE_TYPE_UNSUPPORT_CODE,
+  TDT_FILE_DIR_IS_NULL_CODE,
+  TDT_FILE_GET_DIR_TREE_ERROR_CODE,
+  TDT_FILE_CANNOT_OPEN_DIR_CODE,
+  TDT_PREFETCH_SAMPLE_CANNOT_BE_READ_CODE,
+  TDT_PREFETCH_DATA_QUEUE_IS_CLOSED_CODE,
+  TDT_PREFETCH_GET_SHUFFLE_RESULT_FAIL_CODE,
+  TDT_FILE_CANNOT_DFREE_FILE_MEMORY_CODE,
+  TDT_TRANSFER_CREATE_DELIVER_FAILED_CODE,
+  TDT_TRANSFER_TRAIN_DATA_DELIVER_IS_NULLPTR_CODE,
+  TDT_TRANSFER_EMPTY_GROUPNAME_IN_MULTI_GROUPS_CODE,
+  TDT_TRANSFER_DUPLICATE_GROUPNAME_CODE,
+  TDT_TRANSFER_DUPLICATE_DEVICE_CODE,
+  TDT_TRANSFER_FIND_DEVICE_FAIL_CODE,
+  TDT_SUPERVISOR_FAIL_TO_WRITE_PID_FILE_CODE,
+  TDT_SUPERVISOR_HEARTBEAT_FILE_NOT_INITED_CODE,
+  TDT_SUPERVISOR_JOB_COMMAND_FILE_NOT_INITED_CODE,
+  TDT_SUPERVISOR_JOB_STATE_FILE_NOT_INITED_CODE,
+  TDT_PREFETCH_LABEL_FILE_NOT_INITED_CODE,
+  TDT_PREFETCH_SAMPLE_FILE_DIR_NOT_INITED_CODE,
+  TDT_PREFETCH_NOT_INITED_CODE,
+  TDT_PREFETCH_SHUFFLER_NOT_CREATED_CODE,
+  TDT_SHUFFLE_NOT_INITED_CODE,
+  TDT_PREFETCH_SHUFFLED_ITEM_OUT_OF_FILE_LIST_CODE,
+  TDT_TRANSFER_INIT_FAILED_CODE,
+  TDT_TRANSFER_START_FAILED_CODE,
+  TDT_FOLDER_CANNOT_BE_CREATED_CODE,
+  TDT_CANNOT_GET_STAT_OF_FOLDER_CODE,
+  TDT_FOLDER_IS_FILE_CODE,
+  TDT_TRANSFER_CONFIG_FIEL_SYNTAX_ERROR_CODE,
+  TDT_CHECKSUM_ILLEGAL_MD5_PARAM_CODE,
+  TDT_CHECKSUM_MD5_INIT_FAILED_CODE,
+  TDT_CHECKSUM_MD5_UPDATE_FAILED_CODE,
+  TDT_CHECKSUM_MD5_FINAL_FAILED_CODE,
+  TDT_TRANSFER_DELIVER_IS_NONE_CODE,
+  TDT_SUPERVISOR_FAIL_TO_DEL_JOB_CMD_FILE_CODE,
+  TDT_TRANSFER_FAIL_TO_GET_ENV_VARIABLE_CODE,
+  TDT_MONITOR_INOTIFY_INIT_ERROR_CODE,
+  TDT_MONITOR_INOTIFY_WATCH_ERROR_CODE,
+  TDT_MONITOR_CLOSE_INOTIFYFD_FAIL_CODE,
+  TDT_MONITOR_INOTIFY_READ_SIZE_ERROR_CODE,
+  TDT_MONITOR_UNSUPPORT_CFGITEM_CODE,
+  TDT_MONITOR_FAIL_TO_SET_CFGITEM_CODE,
+  TDT_MONITOR_READ_FILE_FAIL_CODE,
+  TDT_MONITOR_CONFIG_FILE_FORMAT_ERROR_CODE,
+  TDT_MONITOR_STRCAT_FAILED_CODE,
+  TDT_MONITOR_CREATE_CONFIG_FILE_FAIL_CODE,
+  TDT_PREFETCH_FAIL_TO_GENERATE_MD5_CODE,
+  TDT_RECV_MSG_MD5_WRONG_CODE,
+  TDT_RECV_MSG_FAIL_TO_GENERATE_MD5_CODE,
+  TDT_RECV_MSG_SEQUENCE_ERROR_CODE,
+  TDT_SERVER_MEMORY_COPY_FAILED_CODE,
+  TDT_DEVICEID_ERROR_CODE,
+  TDT_MEMORY_DATA_TYPE_FACTORY_MAKE_SHARED_FAILED_CODE,
+  TDT_PREFETCH_FILELIST_NOT_EXIST_CODE,
+  TDT_PREFETCH_SAMPLE_FILE_NOT_FOUND_CODE,
+  TDT_PREFETCH_FILE_OPEN_FAIL_CODE,
+  TDT_PREFETCH_FILE_STAT_FAIL_CODE,
+  TDT_PREFETCH_FILE_MMAP_FAIL_CODE,
+  TDT_PREFETCH_FILE_UNMAP_FAIL_CODE,
+  TDT_PREFETCH_FILE_CLOSE_FAIL_CODE,
+  TDT_PREFETCH_FILE_PARSE_FAIL_CODE,
+  TDT_PREFETCH_CRC32_SIZE_FAIL_CODE,
+  TDT_PREFETCH_CRC32_DATA_FAIL_CODE,
+  TDT_PREFETCH_DATA_QUEUE_CLOSED_CODE,
+  TDT_PREFETCH_INITIALIZE_FAILED_CODE,
+  TDT_PREFETCH_MAP_INSERT_FAILED_CODE,
+  TDT_PREFETCH_INVALID_FILELIST_LINE_CODE,
+  TDT_FILE_STRINGSTREAM_TO_VALUE_FAILED_CODE,
+  TDT_LIST_ID_OFFSET_LENGTH_POSITIVE_INTEGER_FAILED_CODE,
+  TDT_SHUFFLE_ILLEGAL_SHUFFLE_PARAM_CODE,
+  TDT_FILE_SHUFFLER_CREATE_FAILED_CODE,
+  TDT_FILE_UPLOADER_CREATE_FAILED_CODE,
+  TDT_FILE_DOWNLOADER_CREATE_FAILED_CODE,
+  TDT_OBS_CONFIG_INFORMATION_FAIL_CODE,
+  TDT_OBS_CALLBACK_ARGUMENT_FAIL_CODE,
+  TDT_OBS_DOWNLOAD_CREATE_THREAD_FAILED_CODE,
+  TDT_OBS_DOWNLOAD_FILE_FAIL_CODE,
+  TDT_OBS_DOWNLOAD_INIT_FAIL_CODE,
+  TDT_OBS_DOWNLOAD_METADATA_FAIL_CODE,
+  TDT_OBS_LIST_BUCKET_OBJECTS_FAIL_CODE,
+  TDT_MEMORY_MEMCPY_FAILED_CODE,
+  TDT_MEMORY_MEMSET_FAILED_CODE,
+  TDT_MKDIR_CMD_FAILED_CODE,
+  TDT_CP_CMD_FAILED_CODE,
+  TDT_HOST_INIT_FAILED_CODE,
+  TDT_HOST_CHANNEL_NAME_EMPTY_CODE,
+  TDT_HOST_ALLOCATE_MEMORY_FAILED_CODE,
+  TDT_HOST_MEMORY_COPY_FAILED_CODE,
+  TDT_HOST_UNABLE_GET_TDTDATAELEM_CODE,
+  TDT_HOST_PUSH_NOT_INIT_CODE,
+  TDT_TUNING_DATA_TRANSFER_INIT_FAILED_CODE,
+  TDT_TUNING_DATA_RECEIVE_CHECK_PARA_ERROR_CODE,
+  TDT_TUNING_DATA_TRANSFER_PARAMETER_ERROR_CODE,
+  TDT_RECV_MSG_CHECKSUM_WRONG_ERROR_CODE,
+  TDT_SVM_INIT_FAILED_CODE,
+  TDT_SVM_FREE_PIN_FAILED_CODE,
+  TDT_SVM_FREE_SVM_FAILED_CODE,
+  TDT_SVM_ADD_BUFFER_MAP_FAILED_CODE,
+  TDT_STATUS_CODE_TOTAL
+};
+
+/**
+ * @ingroup Tdt status
+ * @brief Regiter error code
+ * @param moduleId [IN] Module ID
+ * @param logLevel [IN] Log level
+ * @param CODE_NAME [out] Error name
+ * @param codeDesc [IN] Error description
+ */
+#ifdef __cplusplus
+#define TDT_DEF_ERROR_CODE(moduleId, logLevel, CODE_NAME, codeDesc)                                          \
+  constexpr TDT_StatusT CODE_NAME = ((0xFFFF & ((uint16_t)moduleId)) << 16) |                                \
+                                    (0xF000 & (((uint16_t)logLevel) << 12)) | (0x0FFF & (CODE_NAME##_CODE)); \
+  const tdt::ErrorNoRegisterar g_##CODE_NAME##_errorno(CODE_NAME, codeDesc);
+#else
+#define TDT_DEF_ERROR_CODE(moduleId, logLevel, CODE_NAME, codeDesc) \
+  static const TDT_StatusT CODE_NAME =                              \
+      ((0xFFFF & ((uint16_t)moduleId)) << 16) | (0xF000 & (((uint16_t)logLevel) << 12)) | (0x0FFF & CODE_NAME##_CODE);
+#endif
+
+/**
+ * @ingroup Tdt status
+ * @brief Get error level according error name
+ * @param CODE_NAME [IN] Error code
+ * @param codeDesc [OUT] Error description
+ */
+#define TDT_GET_ERROR_LEVEL(CODE_NAME) ((CODE_NAME & 0x0000F000) >> 12)
+
+#ifdef __cplusplus
+#define TDT_GET_ERROR_STR(CODE_NAME) (tdt::StatusFactory::GetInstance()->GetErrDesc(CODE_NAME))
+#endif
+
+// Register module id: 0xAABB, AA means system level number, BB means module level number
+constexpr uint16_t MODID_TDT_CLIENT = 0x0101; // TDT_CLIENT module ID
+constexpr uint16_t MODID_TSD_SERVER = 0x0102; // TSD_SERVER
+constexpr uint16_t MODID_HDC = 0x0103; // HDC_SERVER
+constexpr uint16_t MODID_TDT_SHUFFLE = 0x0104; // TDT shuffle module ID
+constexpr uint16_t MODID_TDT_PREFETCH = 0x0105; // TDT prefetch module ID
+constexpr uint16_t MODID_TDT_TRANSFER = 0x0106; // TDT TrainDataTransfer module ID
+constexpr uint16_t MODID_TDT_SUPERVISOR = 0x0107; // TDT supervisor模块ID
+constexpr uint16_t MODID_MEM_POOL = 0x0108; // MEMORY_POOL
+constexpr uint16_t MODID_PPC = 0x0109; // TDT PPC
+constexpr uint16_t MODID_TDT_FILE = 0x0110; // TDT file operation module ID
+constexpr uint16_t MODID_HDC_SERVER = 0x0111; // HDC_SERVER module ID
+constexpr uint16_t MODID_TDT_SERVER = 0x0112; // TDTServer module ID
+constexpr uint16_t MODID_HDC_CLIENT = 0x0113; // HDC_CLIENT module ID
+constexpr uint16_t MODID_TSD_CLIENT = 0x0114; // TSD_CLIENT module ID
+constexpr uint16_t MODID_CHECKSUM = 0x0115; // Checksum module ID
+constexpr uint16_t MODID_TDT_MONITOR = 0x0116; // TDT monitor module ID
+constexpr uint16_t MODID_TDT_HOST = 0x0117; // GE adapts the TDT HOST module ID
+constexpr uint16_t MODID_SVM = 0x0118; // SVM Driver module ID
+
+constexpr uint32_t TDT_API_MAX_SUB_VERSION = 100;
+static const int32_t TDT_INVAILED_DEVICE_ID = 0xFFFFFFFF;
+
+typedef enum tdt_api_version {
+  TDT_API_VERSION_V1_00 = 100,
+  TDT_API_VERSION_V1_01 = 101,
+  TDT_API_VERSION_V2_00 = 200
+} TDT_API_VERSION;
+
+#ifdef __cplusplus
+namespace tdt {
+class StatusFactory {
+ public:
+  /**
+  * @ingroup hiaiengine
+  * @brief Get a pointer to StatusFactory
+  * @param [in]:
+  * @return StatusFactory pointer
+  */
+  TDT_LIB_EXPORT static StatusFactory *GetInstance();
+
+  /**
+  * @ingroup hiaiengine
+  * @brief Registration error code
+  * @param [in]err error code
+  * @param [in]desc Description string of the error code
+  */
+  TDT_LIB_EXPORT void RegisterErrorNo(const uint32_t err, const std::string &desc);
+
+  /**
+  * @ingroup hiaiengine
+  * @brief Get error code description string
+  * @param [in]err error code
+  */
+  std::string GetErrDesc(const uint32_t err);
+
+  /**
+  * @ingroup hiaiengine
+  * @brief Static function: Get error code description string
+  * @param [in]err error code
+  * return : If there is a problem, return the empty string ""
+  */
+  static std::string GetErrCodeDesc(uint32_t errCode);
+
+ protected:
+  /**
+  * @ingroup hiaiengine
+  * @brief Constructor
+  * @param [in] void
+  */
+  StatusFactory();
+
+  /**
+  * @ingroup hiaiengine
+  * @brief Destructor
+  * @param [in] void
+  */
+  ~StatusFactory() {}
+
+  StatusFactory(const StatusFactory &) = delete;
+  StatusFactory(StatusFactory &&) = delete;
+  StatusFactory &operator=(const StatusFactory &) = delete;
+  StatusFactory &operator=(StatusFactory &&) = delete;
+
+  static std::mutex &GetMutex();
+
+ private:
+  std::mutex rwMutex_;
+  std::map<uint32_t, std::string> errDesc_;
+};
+
+class ErrorNoRegisterar {
+ public:
+  /**
+  * @ingroup hiaiengine
+  * @brief Registration error code
+  * @param [in]err error code
+  * @param [in]desc Description of the registration error code
+  */
+  ErrorNoRegisterar(const uint32_t &err, const std::string &desc) {
+    StatusFactory::GetInstance()->RegisterErrorNo(err, desc);
+  }
+
+  ~ErrorNoRegisterar() {}
+  ErrorNoRegisterar(const ErrorNoRegisterar &) = delete;
+  ErrorNoRegisterar(ErrorNoRegisterar &&) = delete;
+  ErrorNoRegisterar &operator=(const ErrorNoRegisterar &) = delete;
+  ErrorNoRegisterar &operator=(ErrorNoRegisterar &&) = delete;
+};
+}  // namespace tdt
+#endif
+
+// register error code
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_INFO, TDT_OK, "running ok");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_DEBUG, TDT_DEBUG_INFO, "debug info");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INTERNAL_ERROR, "internal error");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_WARNING, TDT_COMMON_WARNING, "warnging");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_INFO, TDT_PREFETCH_STOPED, "stopped");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_NOT_EXIST, "File is not existed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_DEFAULT_CONFIG_FILE_NOT_EXIST, "Default config file not exist");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_SIZE_TOO_LARGE, "file size is too large");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_FILE_INVALID_PATH, "file path is invalid");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_MEMORY_EXHAUSTED, "memory exhausted error");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INTERGER_REVERSED, "interger reached reverse");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INSTANCE_NOT_INITIALED,
+                   "call member function before instance initialed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INITIAL_FAILED, "initial failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_INSTANCE_NOT_FOUND, "instance not found");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_CREATE_SESSION_FAILED, "create hdc session failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_DESTROY_SESSION_FAILED, "destory hdc session failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_HDC_SESSION_DO_NOT_EXIST, "hdc session id do not exist");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_PID_IS_EXIST, "tdtMain pid is exist");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_INIT_ERROR, "hdc server init error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CREATE_ERROR, "hdc server create error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_DESTROY_ERROR, "hdc server destroy error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_ACCEPT_ERROR, "hdc server accept error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSED_ERROR, "hdc server closed error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_INTERNAL_ERROR, "hdc fail");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_DEVICEID_ERROR, "hdc device id error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSE_CHILD_SESSION_ERROR, "hdc server close child session error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEARFUNC_IS_NULL, "serarfunc is null");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SENDMSG_FAILED, "hdc send msg failed");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_CLOSE_SERVER_SESSION_ERROR,
+                   "hdc server close server session error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SRV_HEART_BEAT_TIMEOUT, "hdc server heart beat timeout");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_DRV_ERROR, "hiai drv return error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_INFO, TDT_HDC_INFO, "hdc info");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEND_ERROR, "hdc send message failed");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_SEG_SIZE_ERROR, "hiai seg size error");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_HDC_MESSAGE_NULL, "Message input is null");
+TDT_DEF_ERROR_CODE(MODID_HDC, TDT_ERROR, TDT_MESSAGE_PARSE_ERROR, "hdc message parse error");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDCSERVER_DO_NOT_EXIST, "hdc server do not exist");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDCSESSIONID_NOT_AVAILABLE, "hdc sessionid vector is empty");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_SET_HDCSESSION_REFERENCE_FAILED,
+                   "hdc set hdc session reference failed");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_RECV_MSG_ERROR, "hdc recv message failed");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SEND_MSG_ERROR, "hdc send message failed");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SRV_TYPE_ERROR, "hdc service type is not supported");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SERVER_CLIENT_SOCKET_CLOSED,
+                   "hdc service or client socket closed");
+
+/*********************TSDAEMON************************/
+// create TSDAEMON error level error
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_START_FAIL, "Tsdaemon start fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_FIRST_GETPID_FAILED, "Tsdaemon first get pid fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_KILL_PROCESS_FAILED, "Tsdaemon kill processfail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_SECOND_GETPID_FAILED, "Tsdaemon second get pid fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_FINAL_FAILED, "Tsdaemon clean process final fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_STATE_FAILED, "Tsdaemon init state fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, "Tsdaemon init hdcserver fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal");
+TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal");
+
+/********************* PPC ****************************/
+// create PPC error level error
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_DRIVER_INIT_FAIL, "Init PPC driver fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_CREATE_FAIL, "Create PPC server or PPC client fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL, "Destory PPC server or PPC client fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLOSE, "PPC server is closed");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_GET_SET_MSG_BUFFER_FAIL, "PPC get or set msg buffer fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_CONNECT_FAIL, "PPC connect is failed");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_NOT_EXISTED, "PPC session is not existed");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SEND_RECEIVE_MSG_FAIL, "PPC send or receive msg fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_FREE_FAIL, "PPC msg free fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_ALLOC_MSG_FAIL, "PPC alloc memory for msg fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_LEN_NOT_MATCH, "PPC message length not match");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_MSG_BUF_NULL, "PPC message buffer is null");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_CLIENT_INVALID_PARAM, "PPC message client invalid param fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_INVALID_PARAM, "PPC message server invalid param fail");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_CLIENT_RECVDATA_CONTINUE,
+                   "PPC message client receive not expected msg continue");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_SOCKET_CLOSED,
+                   "PPC message server receive server or client socket closed msg");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_RECV_MSG_ERROR, "PPC receive msg failed");
+TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SESSION_CLOSE_ERROR, "PPC close session failed");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_GET_FILE_STATE_FAIL, "can not get file state");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_OPEN_FILE_FAIL, "can not open file");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CONTENT_EMPTY, "file content is empty");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_FILE_DESTROYED, "file is destroyed");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_UNABLE_TO_GET_FILE_MEMORY, "fail to get memory for file");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_TYPE_UNSUPPORT, "file type is not supported");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_DIR_IS_NULL, "pointer to dir is null");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_GET_DIR_TREE_ERROR, "can not get the tree of dir");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CANNOT_OPEN_DIR, "dir cannot be opened");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_CANNOT_DFREE_FILE_MEMORY, "DFree memory of file failed");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_SHUFFLE_SIZE_ILLEGAL,
+                   "shuffle size is less or equal to 0");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ONLINE_UNIQUE_SEED_ILLEGAL,
+                   "online unique seed is equal to 0");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_UNABLE_TO_CREATE_SHUFFLE_LIST,
+                   "unable to create shuffle list");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ILLEGAL_SHUFFLE_TYPE, "illegal shuffle type");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_NOT_INITED, "shuffler has not been inited");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_HAS_NO_LABEL, "the sample has no label");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_CANNOT_BE_READ, "the sample cannot be read");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_ILLEGAL_DATATYPE, "illegal data type");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CREATE_FAILED, "creating prefetcher failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_UNABLE_TO_GET_TDTDATAITEM, "fail to get TDTDataItem");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_DATA_QUEUE_IS_CLOSED, "data queue is closed");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_GET_SHUFFLE_RESULT_FAIL, "fail to get shuffle result");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_LABEL_FILE_NOT_INITED, "label file has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_FILE_DIR_NOT_INITED,
+                   "directory of sample files has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_NOT_INITED, "prefetcher in deliver has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SHUFFLER_NOT_CREATED,
+                   "shuffler in prefetcher has not been created");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SHUFFLED_ITEM_OUT_OF_FILE_LIST,
+                   "shuffled item is out of file list");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FAIL_TO_GENERATE_MD5, "fail to generate md5 of data");
+TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_ILLEGAL_MD5_PARAM, "params to generate md5 is illegal");
+TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_INIT_FAILED, "md5_init failed");
+TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_UPDATE_FAILED, "md5_update failed");
+TDT_DEF_ERROR_CODE(MODID_CHECKSUM, TDT_ERROR, TDT_CHECKSUM_MD5_FINAL_FAILED, "md5_final failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CANNOT_OPEN_CONFIGFILE, "can not open config file");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_PARSE_FILE_FAILED, "parse file failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_CHANNEL_DATA,
+                   "no channel can be found in config file");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_PARAMETER, "no parameter can be found");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_NO_PARAMETER_ARG,
+                   "the argment is not --configfile or stop");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CREATE_DELIVER_FAILED,
+                   "fail to create train data deliver");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_TRAIN_DATA_DELIVER_IS_NULLPTR,
+                   "train data deliver in the list is nullptr");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_INIT_FAILED, "train data deliver init failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_START_FAILED, "train data deliver start failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_CONFIG_FIEL_SYNTAX_ERROR,
+                   "config file has syntax error");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DELIVER_IS_NONE, "no deliver is existed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_MKDIR_CMD_FAILED, "mkdir cmd failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_CP_CMD_FAILED, "cp cmd failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_EMPTY_GROUPNAME_IN_MULTI_GROUPS, "empty group_name");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DUPLICATE_GROUPNAME,
+                   "the same group_name already exists");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_DUPLICATE_DEVICE, "the same device already exists");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_ERROR, TDT_TRANSFER_FIND_DEVICE_FAIL, "cannot find device");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_WARNING, TDT_SUPERVISOR_INOTIFY_INTERRUPT, "inotify is interrupted");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_UNKOWN_JOB_STATE, "unknow job state");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_ILLEGAL_HEARTBEAT_TIME, "illegal heartbeat time");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_READ_SIZE_ERROR,
+                   "read size of inotify is error");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_INIT_ERROR,
+                   "Initialization of inotify failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_CLOSE_INOTIFYFD_FAIL, "Close inotifyFd failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_INOTIFY_WATCH_ERROR, "Add watch of inotify failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_FAIL_TO_WRITE_PID_FILE, "fail to write pid file");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_HEARTBEAT_FILE_NOT_INITED,
+                   "heart beat file has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_JOB_COMMAND_FILE_NOT_INITED,
+                   "job command file has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_JOB_STATE_FILE_NOT_INITED,
+                   "job state file has not been inited");
+TDT_DEF_ERROR_CODE(MODID_TDT_SUPERVISOR, TDT_ERROR, TDT_SUPERVISOR_FAIL_TO_DEL_JOB_CMD_FILE,
+                   "fail to delete job command file");
+TDT_DEF_ERROR_CODE(MODID_TDT_TRANSFER, TDT_WARNING, TDT_TRANSFER_FAIL_TO_GET_ENV_VARIABLE,
+                   "can not get environment variable");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_INOTIFY_INIT_ERROR, "Initialization of inotify failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_INOTIFY_WATCH_ERROR, "Add watch of inotify failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CLOSE_INOTIFYFD_FAIL, "Close inotifyFd failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_INOTIFY_READ_SIZE_ERROR,
+                   "read size of inotify is not correct");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_UNSUPPORT_CFGITEM, "unsupported config item");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_WARNING, TDT_MONITOR_FAIL_TO_SET_CFGITEM, "can not set local config item");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_READ_FILE_FAIL, "read file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CONFIG_FILE_FORMAT_ERROR,
+                   "config file is incorrectly formatted");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_STRCAT_FAILED, "strcat failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_MONITOR, TDT_ERROR, TDT_MONITOR_CREATE_CONFIG_FILE_FAIL,
+                   "create ConfigFile pointer failed");
+
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MAP_BUFFER_ERROR, "host buffer map to device failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_ALLOC_BUFFER_FAILED, "memory pool alloc buffer failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_DATA_SIZE_WRONG, "Input datasize is wrong");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_FREE_HDC_BUFFER_FAILED, "memory pool free buffer failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_INVALID_VALUE, "invalid parameter");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_NO_USEFUL_MEMORY, "no usable memory in memory pool");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MESSAGE_NULL, "recv msg is null");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_SENDMSG_FAILED, "send msg failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_STOPPED, "mempool has stopped");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_HDC_MEMORY_ADDR_NOT_ALIGN, "buffer not aligned");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_INITED, "memory pool has inited");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_GET_NULL, "mempool not exist");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_NOT_EXISTED, "mempool not exist");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_RECOVER_DATA_FAILED, "Recover recv data failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_STATUS_ERROR, "Memory status error");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_UPDATE_FAILED, "update memory pool status failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_POOL_RESIZE_FAILED, "resize memory pool status failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_EXCEED_MAX_THREAD, "thread size is too large");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_WARNING_SET_THREAD_NAME_FAILED, "rename thread failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_WRONG_PRIORITY, "priority is invalid");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_JOIN_TASK_ERROR, "join task failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_NULL_FUNC, "func is null");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_INIT_FAIL, "sear/dear init failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_WARNING, TDT_EXISTED_FUNC, "func has already existed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_DESTROY_FAILED, "mempool destroy failed");
+TDT_DEF_ERROR_CODE(MODID_MEM_POOL, TDT_ERROR, TDT_MEMORY_DATA_TYPE_FACTORY_MAKE_SHARED_FAILED,
+                   "data type factory make shared failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TDTSEVER_ACCEPT_FAILED, "tdt server accept hdc session failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_CHANNEL_DO_NOT_EXIST, "channel do not exist");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_NULL_POINTER_MSG, "message is null");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_UNKNOWN_RSP, "transcation status error");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_TIMEOUT, "transcation time out");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_NOT_EXIST, "transcation requst id is not exist");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TRAN_ID_GEN_ERROR, "transcation generateid failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_SEND_CHANNEL_FAILED, "send channel info failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_SEND_CHANNEL_TIMEOUT, "send channel info time out");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_STOPPED, "queue has been stopped");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_POP_FAILED, "failed to pop data from queue");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_INFO, TDT_QUEUE_PUSH_FAILED, "failed to push data from queue");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_CREATE_FAILED, "queue create fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_NOT_FIND, "queue not find");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_FULL, "queue is full");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_QUEUE_EMPTY, "queue is empty");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_DATA_ENTO_CP_FAILED, "enqueue to computer process failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_STOP_CP_QUEUE_FAILED, "stop computer process queue failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_NO_CHANNEL_INFO_ERROR, "no channel in first msg");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_MD5_WRONG, "md5 of recv msg is wrong");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_CHECKSUM_WRONG_ERROR, "checksum of recv msg is wrong");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_FAIL_TO_GENERATE_MD5, "md5 of recv msg is wrong");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_RECV_MSG_SEQUENCE_ERROR, "sequence recv msg is wrong");
+TDT_DEF_ERROR_CODE(MODID_TDT_SERVER, TDT_ERROR, TDT_SERVER_MEMORY_COPY_FAILED, "memory copy failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_CHANNEL_HAS_NO_SESSION_ERROR, "channel has no session");
+TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_INIT_ERROR, "hdc client init error");
+TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_CREATE_SESSION_ERROR, "hdc client create error");
+TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_DO_NOT_EXIST, "hdc client do not exist");
+TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_DESTROY_ERROR, "hdc server destroy error");
+TDT_DEF_ERROR_CODE(MODID_HDC_CLIENT, TDT_ERROR, TDT_HDC_CLIENT_CLOSED, "hdc client has been closed");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_BIND_CPUCORE_FAILED, "thread function bind cpu core failed");
+TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SRV_CLOSED, "hdc server has been closed");
+TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_OPEN_FAILED, "tsd client open failed");
+TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_CLOSE_FAILED, "tsd client close failed");
+TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_UPDATE_PROFILING_FAILED,
+                   "tsd client update profiling failed");
+TDT_DEF_ERROR_CODE(MODID_TSD_CLIENT, TDT_ERROR, TDT_TSD_CLT_INTERFACE_NOT_SUPPORT, "tsd client func not support");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILELIST_NOT_EXIST, "tdt filelist open failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_SAMPLE_FILE_NOT_FOUND, "tdt sample file is empty");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_OPEN_FAIL, "tdt open sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_STAT_FAIL, "tdt stat sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_MMAP_FAIL, "tdt mmap sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_UNMAP_FAIL, "tdt unmap sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_CLOSE_FAIL, "tdt close sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_FILE_PARSE_FAIL, "tdt parse sample file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CRC32_SIZE_FAIL, "tdt crc32 of size mismatch");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_CRC32_DATA_FAIL, "tdt crc32 of data mismatch");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_DATA_QUEUE_CLOSED, "tdt prefetch data queue closed");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_MAP_INSERT_FAILED, "map insert fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_INITIALIZE_FAILED, "prefetch init fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_PREFETCH_INVALID_FILELIST_LINE, "invalid filelist line");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_STRINGSTREAM_TO_VALUE_FAILED, "string to value fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_LIST_ID_OFFSET_LENGTH_POSITIVE_INTEGER_FAILED,
+                   "value positive integer fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_SHUFFLE_ILLEGAL_SHUFFLE_PARAM, "Illegal shuffle parameter");
+TDT_DEF_ERROR_CODE(MODID_TDT_SHUFFLE, TDT_ERROR, TDT_FILE_SHUFFLER_CREATE_FAILED, "Create file shuffler fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_UPLOADER_CREATE_FAILED, "Create uploader fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FILE_DOWNLOADER_CREATE_FAILED, "Create downloader fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FOLDER_CANNOT_BE_CREATED, "folder cannot been created");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_CANNOT_GET_STAT_OF_FOLDER, "cannot get stat of folder");
+TDT_DEF_ERROR_CODE(MODID_TDT_FILE, TDT_ERROR, TDT_FOLDER_IS_FILE, "folder is a file");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_CONFIG_INFORMATION_FAIL, "OBS configuration fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_CALLBACK_ARGUMENT_FAIL, "OBS callback argument fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_CREATE_THREAD_FAILED,
+                   "OBS download create thread fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_FILE_FAIL, "OBS download file fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_INIT_FAIL, "OBS download init fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_DOWNLOAD_METADATA_FAIL, "OBS download metadata fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_OBS_LIST_BUCKET_OBJECTS_FAIL, "OBS list bucket fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_MEMORY_MEMCPY_FAILED, "tdt securec memcpy fail");
+TDT_DEF_ERROR_CODE(MODID_TDT_PREFETCH, TDT_ERROR, TDT_MEMORY_MEMSET_FAILED, "tdt securec memset fail");
+// TDT HOST
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_INIT_FAILED, "tdt host init failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_CHANNEL_NAME_EMPTY, "channel name is empty");
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_ALLOCATE_MEMORY_FAILED, "allocate memory failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_ERROR, TDT_HOST_MEMORY_COPY_FAILED, "memory copy failed");
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_WARNING, TDT_HOST_UNABLE_GET_TDTDATAELEM, "can not get data element");
+TDT_DEF_ERROR_CODE(MODID_TDT_HOST, TDT_WARNING, TDT_HOST_PUSH_NOT_INIT, "push data but not init");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_TRANSFER_INIT_FAILED,
+                   "failed to init the channel of tuning-data");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_RECEIVE_CHECK_PARA_ERROR, "the index is error");
+
+TDT_DEF_ERROR_CODE(MODID_TDT_CLIENT, TDT_ERROR, TDT_TUNING_DATA_TRANSFER_PARAMETER_ERROR, "the parameter is error");
+TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_INIT_FAILED, "SVM driver init failed");
+TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_FREE_PIN_FAILED, "SVM driver free host pin memory failed");
+TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_FREE_SVM_FAILED, "SVM driver free device svm memory failed");
+TDT_DEF_ERROR_CODE(MODID_SVM, TDT_ERROR, TDT_SVM_ADD_BUFFER_MAP_FAILED, "add svm buffer info to map failed");
+#endif  // INC_TDT_STATUS_H_
diff --git a/third_party/fwkacllib/inc/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/inc/tdt/tdt_host_interface.h
new file mode 100644
index 00000000..3e7d11ee
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/tdt/tdt_host_interface.h
@@ -0,0 +1,210 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_
+#define HOST_INNER_INC_TDT_HOST_INTERFACE_H_
+
+#include <string.h>
+#include <memory>
+#include <vector>
+#include "tdt/data_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+namespace tdt {
+/**
+* @ingroup TdtHostInit
+* @brief Initialize the interface, start and initialize various general thread, log and other services
+*
+* @par Function
+* Initialize the interface, start and initialize various general thread, log and other services
+*
+* @param  deviceId [IN] type #unsigned int. Physical device ID
+* @retval #0 Success
+* @retval #Not 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtHostInit(uint32_t deviceId);
+
+/**
+* @ingroup TdtHostPushData
+* @brief Blocking queue. When the queue is full, the Push interface will block.
+*
+* @par Function
+* Blocking queue. When the queue is full, the Push interface will block.
+*
+* @param channelName [IN] type #String. queue channel name
+* @param items [IN] type #vector<DataItem> DataItem is defined in data_common.h.  input data
+* @retval 0 Success
+* @retval OtherValues 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'DataItem' defined
+*/
+int32_t TdtHostPushData(const std::string &channelName, const std::vector<DataItem> &item, uint32_t deviceId = 0);
+
+/**
+* @ingroup TdtHostDestroy
+* @brief Notify TDT component to close related resources
+*
+* @par Function
+* Notify TDT component to close related resources
+*
+* @param  NA
+* @retval 0 Success
+* @retval OtherValues Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtHostDestroy();
+
+/**
+* @ingroup TdtHostPreparePopData
+* @brief Prepare pop data from Tdt data storage queue
+*
+* @par Function
+* Prepare pop data from Tdt data storage queue
+*
+* @param NA
+* @retval 0 Success
+* @retval OtherValues 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'DataItem' defined
+*/
+int32_t TdtHostPreparePopData();
+
+/**
+* @ingroup TdtHostPopData
+* @brief POP data from Tdt data storage queue
+*
+* @par Function
+* POP data from Tdt data storage queue
+*
+* @param channelName [IN] type #String. queue channel name
+* @param items [IN] type #vector<DataItem> DataItem is defined in data_common.h.  input data
+* @retval 0 Success
+* @retval OtherValues 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'DataItem' defined
+*/
+int32_t TdtHostPopData(const std::string &channelName, std::vector<DataItem> &item);
+
+/**
+* @ingroup TdtHostStop
+* @brief Activate the thread that reads data externally from Tdt and
+* send end of sequence data so that the external thread can exit
+*
+* @par Function
+* Activate the thread that reads data externally from Tdt and send
+* end of sequence data so that the external thread can exit
+*
+* @param  channelName [IN] type #String. queue channel name
+* @retval 0 Success
+* @retval OtherValues Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtHostStop(const std::string &channelName);
+
+/**
+* @ingroup TdtInFeedInit
+* @brief Initialize the interface, start and initialize various general thread, log and other services
+*
+* @par Function
+* Initialize the interface, start and initialize various general thread, log and other services
+*
+* @param  deviceId [IN] type #unsigned int. logic device ID
+* @retval #0 Success
+* @retval #Not 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtInFeedInit(uint32_t deviceId);
+
+/**
+* @ingroup TdtOutFeedInit
+* @brief Initialize the interface, start and initialize various general thread, log and other services
+*
+* @par Function
+* Initialize the interface, start and initialize various general thread, log and other services
+*
+* @param  deviceId [IN] type #unsigned int. logic device ID
+* @retval #0 Success
+* @retval #Not 0 Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtOutFeedInit(uint32_t deviceId);
+
+/**
+* @ingroup TdtInFeedDestroy
+* @brief Notify TDT component to close related resources
+*
+* @par Function
+* Notify TDT component to close related resources
+*
+* @param  NA
+* @retval 0 Success
+* @retval OtherValues Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtInFeedDestroy(uint32_t deviceId);
+
+/**
+* @ingroup TdtOutFeedDestroy
+* @brief Notify TDT component to close related resources
+*
+* @par Function
+* Notify TDT component to close related resources
+*
+* @param  NA
+* @retval 0 Success
+* @retval OtherValues Fail
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tdt_host_interface.h: Header file where the interface declaration is located.
+*/
+int32_t TdtOutFeedDestroy();
+}  // namespace tdt
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // HOST_INNER_INC_TDT_HOST_INTERFACE_H_
diff --git a/third_party/fwkacllib/inc/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/inc/tdt/tsd_client.h
new file mode 100644
index 00000000..665c8b82
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/tdt/tsd_client.h
@@ -0,0 +1,195 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_
+#define TDT_HOST_INNER_INC_TSD_CLIENT_H_
+
+#include <condition_variable>
+#include <map>
+#include <memory>
+#include <mutex>
+#include "tdt/status.h"
+#include "tdt/data_common.h"
+#include "toolchain/prof_callback.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/**
+* @ingroup Open
+* @brief Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of other processes
+*
+* @par Function
+* Used for the Framework process to communicate with the TSDDaemon process,
+* and notify TSD to complete the initialization of other processes
+*
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
+* @param rankSize [IN] type #unsigned int. The rankSize of the training.
+* The default value is 1. When rankSize is greater than 1,
+* HCCP will be pulled to perform set communication related operations.
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
+
+/**
+* @ingroup Close
+* @brief notify TSDClient close resource
+*
+* @par Function
+* notify TSDClient close resource
+*
+* @param NA
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
+
+/**
+* @ingroup UpdateProfilingMode
+* @brief notify TSDClient update profiling mode
+*
+* @par Function
+* notify TSDClient update profiling mode
+*
+* @param NA
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
+
+/**
+* @ingroup TsdSetMsprofReporterCallback
+* @brief 用于推理场景下设置aicpu的profilng的callback函数
+*
+* @par Function
+* 设置offline模式下aicpu_sd进程的profiling的callback函数
+*
+* @param callback [IN] type #MsprofReporterCallback. 回调函数
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
+
+/**
+* @ingroup CreateCmdParameterObj
+* @brief creat tsdclient func parameter obj.
+*
+* @par Function
+* creat tsdclient func parameter obj.
+*
+* @param type [IN] type tdt::TsdCmdType, tsd func type.
+* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @retval TDT_OK Success
+* @retval TDT_INTERFACE_NOT_SUPPORT
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
+* @li status.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_StatusT CreateCmdParameterObj(tdt::TsdCmdType type, void **cmdParameterObj);
+
+/**
+* @ingroup SetCmdParameterObjAttribute
+* @brief set cmdParameterObj input value.
+*
+* @par Function
+* set cmdParameterObj input value.
+*
+* @param type [IN] type tdt::TsdCmdType, tsd func type.
+* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @param itemType [IN] type tdt::InputItem, func input type.
+* @param valuePtr [IN] type const void *, input value.
+* @param valueLength [IN] type int, input value length.
+* @retval TDT_OK Success
+* @retval TDT_INTERFACE_NOT_SUPPORT
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
+* @li status.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_StatusT SetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, const void *valuePtr, int valueLength);
+
+/**
+* @ingroup GetCmdParameterObjAttribute
+* @brief set cmdParameterObj input value.
+*
+* @par Function
+* set cmdParameterObj input value.
+*
+* @param type [IN] type tdt::TsdCmdType, tsd func type.
+* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @param itemType [IN] type tdt::InputItem, func input type.
+* @param valuePtr [IN] type const void *, input value.
+* @param valueLength [IN] type int, input value length.
+* @retval TDT_OK Success
+* @retval TDT_INTERFACE_NOT_SUPPORT
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
+* @li status.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_StatusT GetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, void *valuePtr, int &valueLength);
+
+/**
+* @ingroup TsdClientCmd
+* @brief creat tsdclient func parameter obj.
+*
+* @par Function
+* creat tsdclient func parameter obj.
+*
+* @param type [IN] type tdt::TsdCmdType, tsd func type.
+* @param cmdParameterObj [IN] type void *, func parameter obj.
+* @retval TDT_OK Success
+* @retval TDT_INTERFACE_NOT_SUPPORT
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
+* @li status.h: Header file where 'TDT_StatusT' defined
+*/
+TDT_StatusT TsdClientCmd(tdt::TsdCmdType cmd, void *cmdParameterObj);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // TDT_HOST_INNER_INC_TSD_CLIENT_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/inc/toolchain/adx_datadump_server.h
new file mode 100644
index 00000000..67adecd9
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/adx_datadump_server.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ADX_DATADUMP_SERVER_H
+#define ADX_DATADUMP_SERVER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * @brief initialize server for normal datadump function.
+ * @return
+ *      IDE_DAEMON_OK:    datadump server init success
+ *      IDE_DAEMON_ERROR: datadump server init failed
+ */
+int AdxDataDumpServerInit();
+
+/**
+ * @brief uninitialize server for normal datadump function.
+ * @return
+ *      IDE_DAEMON_OK:    datadump server uninit success
+ *      IDE_DAEMON_ERROR: datadump server uninit failed
+ */
+int AdxDataDumpServerUnInit();
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/third_party/fwkacllib/inc/inc/toolchain/plog.h b/third_party/fwkacllib/inc/inc/toolchain/plog.h
new file mode 100644
index 00000000..0d42e31d
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/plog.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _PLOG_H_
+#define _PLOG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#ifndef LINUX
+#define LINUX 0
+#endif // LINUX
+
+#ifndef WIN
+#define WIN 1
+#endif
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE == LINUX)
+#define DLL_EXPORT __attribute__((visibility("default")))
+#else
+#define DLL_EXPORT _declspec(dllexport)
+#endif
+
+/**
+ * @ingroup plog
+ * @brief DlogReportInitialize: init log in service process before all device setting.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportInitialize();
+
+/**
+ * @ingroup plog
+ * @brief DlogReportFinalize: release log resource in service process after all device reset.
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogReportFinalize();
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // D_PLOG_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/inc/toolchain/prof_acl_api.h
new file mode 100644
index 00000000..07b32149
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/prof_acl_api.h
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MSPROFILER_API_PROF_ACL_API_H_
+#define MSPROFILER_API_PROF_ACL_API_H_
+
+// DataTypeConfig
+#define PROF_ACL_API                0x00000001
+#define PROF_TASK_TIME              0x00000002
+#define PROF_AICORE_METRICS         0x00000004
+#define PROF_AICPU_TRACE            0x00000008
+#define PROF_MODEL_EXECUTE          0x00000010
+#define PROF_RUNTIME_API            0x00000020
+#define PROF_RUNTIME_TRACE          0x00000040
+#define PROF_SCHEDULE_TIMELINE      0x00000080
+#define PROF_SCHEDULE_TRACE         0x00000100
+#define PROF_AIVECTORCORE_METRICS   0x00000200
+#define PROF_SUBTASK_TIME           0x00000400
+
+#define PROF_TRAINING_TRACE         0x00000800
+#define PROF_HCCL_TRACE             0x00001000
+
+#define PROF_TASK_TRACE             0x00001852
+
+// system profilinig switch
+#define PROF_CPU                    0x00010000
+#define PROF_HARDWARE_MEMORY        0x00020000
+#define PROF_IO                     0x00040000
+#define PROF_INTER_CONNECTION       0x00080000
+#define PROF_DVPP                   0x00100000
+#define PROF_SYS_AICORE_SAMPLE      0x00200000
+#define PROF_AIVECTORCORE_SAMPLE    0x00400000
+
+#define PROF_MODEL_LOAD             0x8000000000000000
+
+// DataTypeConfig MASK
+#define PROF_ACL_API_MASK                0x00000001
+#define PROF_TASK_TIME_MASK              0x00000002
+#define PROF_AICORE_METRICS_MASK         0x00000004
+#define PROF_AICPU_TRACE_MASK            0x00000008
+#define PROF_MODEL_EXECUTE_MASK          0x00000010
+#define PROF_RUNTIME_API_MASK            0x00000020
+#define PROF_RUNTIME_TRACE_MASK          0x00000040
+#define PROF_SCHEDULE_TIMELINE_MASK      0x00000080
+#define PROF_SCHEDULE_TRACE_MASK         0x00000100
+#define PROF_AIVECTORCORE_METRICS_MASK   0x00000200
+#define PROF_SUBTASK_TIME_MASK           0x00000400
+
+#define PROF_TRAINING_TRACE_MASK         0x00000800
+#define PROF_HCCL_TRACE_MASK             0x00001000
+
+// system profilinig mask
+#define PROF_CPU_MASK                    0x00010000
+#define PROF_HARDWARE_MEMORY_MASK        0x00020000
+#define PROF_IO_MASK                     0x00040000
+#define PROF_INTER_CONNECTION_MASK       0x00080000
+#define PROF_DVPP_MASK                   0x00100000
+#define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000
+#define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000
+
+#define PROF_MODEL_LOAD_MASK             0x8000000000000000
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+#include <cstdint>
+
+namespace Msprofiler {
+namespace Api {
+/**
+ * @name  ProfGetOpExecutionTime
+ * @brief get op execution time of specific part of data
+ * @param data  [IN] data read from pipe
+ * @param len   [IN] data length
+ * @param index [IN] index of part(op)
+ * @return op execution time (us)
+ */
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
+}
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_API_PROF_ACL_API_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/inc/toolchain/prof_callback.h
new file mode 100644
index 00000000..3fad74bc
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/prof_callback.h
@@ -0,0 +1,135 @@
+/**
+ * Copyright 2020-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @file prof_callback.h
+ * @brief declaraion of profiling callbacks
+ */
+
+#ifndef MSPROFILER_PROF_CALLBACK_H_
+#define MSPROFILER_PROF_CALLBACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+
+#include "stddef.h"
+#include "stdint.h"
+
+/**
+ * @name  MsprofErrorCode
+ * @brief error code
+ */
+enum MsprofErrorCode {
+    MSPROF_ERROR_NONE = 0,
+    MSPROF_ERROR_MEM_NOT_ENOUGH,
+    MSPROF_ERROR_GET_ENV,
+    MSPROF_ERROR_CONFIG_INVALID,
+    MSPROF_ERROR_ACL_JSON_OFF,
+    MSPROF_ERROR,
+};
+
+#define MSPROF_ENGINE_MAX_TAG_LEN (31)
+
+/**
+ * @name  ReporterData
+ * @brief struct of data to report
+ */
+struct ReporterData {
+    char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  // the sub-type of the module, data with different tag will be writen
+    int deviceId;                             // the index of device
+    size_t dataLen;                           // the length of send data
+    unsigned char *data;                      // the data content
+};
+
+/**
+ * @name  MsprofReporterModuleId
+ * @brief module id of data to report
+ */
+enum MsprofReporterModuleId {
+    MSPROF_MODULE_DATA_PREPROCESS = 0,    // DATA_PREPROCESS
+    MSPROF_MODULE_HCCL,                   // HCCL
+    MSPROF_MODULE_ACL,                    // AclModule
+    MSPROF_MODULE_FRAMEWORK,              // Framework
+    MSPROF_MODULE_RUNTIME                 // runtime
+};
+
+/**
+ * @name  MsprofReporterCallbackType
+ * @brief reporter callback request type
+ */
+enum MsprofReporterCallbackType {
+    MSPROF_REPORTER_REPORT = 0,           // report data
+    MSPROF_REPORTER_INIT,                 // init reporter
+    MSPROF_REPORTER_UNINIT,               // uninit reporter
+};
+
+/**
+ * @name  MsprofReporterCallback
+ * @brief callback to start reporter/stop reporter/report date
+ * @param moduleId  [IN] enum MsprofReporterModuleId
+ * @param type      [IN] enum MsprofReporterCallbackType
+ * @param data      [IN] callback data (nullptr on INTI/UNINIT)
+ * @param len       [IN] callback data size (0 on INIT/UNINIT)
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);
+
+
+#define MSPROF_OPTIONS_DEF_LEN_MAX (2048)
+
+/**
+ * @name  MsprofGeOptions
+ * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS
+ */
+struct MsprofGeOptions {
+    char jobId[MSPROF_OPTIONS_DEF_LEN_MAX];
+    char options[MSPROF_OPTIONS_DEF_LEN_MAX];
+};
+
+/**
+ * @name  MsprofCtrlCallbackType
+ * @brief ctrl callback request type
+ */
+enum MsprofCtrlCallbackType {
+    MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
+    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
+    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
+    MSPROF_CTRL_FINALIZE                    // stop profiling
+};
+
+/**
+ * @name  MsprofCtrlCallback
+ * @brief callback to start/stop profiling
+ * @param type      [IN] enum MsprofCtrlCallbackType
+ * @param data      [IN] callback data
+ * @param len       [IN] callback data size
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);
+
+/**
+ * @name  MsprofSetDeviceCallback
+ * @brief callback to notify set/reset device
+ * @param devId     [IN] device id
+ * @param isOpenDevice  [IN] true: set device, false: reset device
+ */
+typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_PROF_CALLBACK_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/prof_engine.h b/third_party/fwkacllib/inc/inc/toolchain/prof_engine.h
new file mode 100644
index 00000000..0e757dcf
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/prof_engine.h
@@ -0,0 +1,207 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MSPROF_ENGINE_PROF_ENGINE_H_
+#define MSPROF_ENGINE_PROF_ENGINE_H_
+#define MSVP_PROF_API __attribute__((visibility("default")))
+
+#include <map>
+#include <string>
+#include "prof_reporter.h"
+
+/**
+ * @file prof_engine.h
+ * @defgroup ModuleJobConfig the ModuleJobConfig group
+ * This is the ModuleJobConfig group
+ */
+namespace Msprof {
+namespace Engine {
+/**
+ * @ingroup ModuleJobConfig
+ * @brief struct ModuleJobConfig
+ * record config info
+ */
+struct ModuleJobConfig {
+  std::map<std::string, std::string> switches; /**< key is the config name, value is the config value(on or off) */
+};
+
+/**
+ * @defgroup PluginIntf the pluginInf group
+ * This is the pluginInf group
+ */
+
+/**
+ * @ingroup PluginIntf
+ * @brief class PluginIntf
+ */
+class MSVP_PROF_API PluginIntf {
+ public:
+  virtual ~PluginIntf() {}
+
+ public:
+  /**
+   * @ingroup PluginIntf
+   * @name  : Init
+   * @brief : API of user plugin, libmsporf call this API to send a Reporter to user plugin
+   * @par description :
+   *  API of user plugin, libmsporf call this API to send a Reporter to user plugin.
+   * @param reporter [IN] const Reporter* the Reporter from libmsprof
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_engine.h
+   * @since c60
+   * @see UnInit
+   */
+  virtual int Init(const Reporter *reporter) = 0;
+
+  /**
+   * @ingroup PluginIntf
+   * @name  : OnNewConfig
+   * @brief : API of user plugin, libmsprof call this API to send config info to user plugin \n
+              If the user plugin needn't config, no need to redefine this function
+   * @param config [IN] const ModuleJobConfig * the config from libmsprof
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_engine.h
+   * @since c60
+   * @see Init | UnInit
+   */
+  virtual int OnNewConfig(const ModuleJobConfig *config) { return 0; }
+
+  /**
+   * @ingroup PluginIntf
+   * @name  : UnInit
+   * @brief : API of user plugin, libmsprof call this API to notify plugin stop to send data
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_engine.h
+   * @since c60
+   * @see Init
+   */
+  virtual int UnInit() = 0;
+};
+
+/**
+ *  @defgroup EngineIntf  the EngineIntf group
+ *  This is the EngineIntf group
+ */
+
+/**
+ *  @ingroup EngineIntf
+ *  @brief class EngineIntf
+ */
+class MSVP_PROF_API EngineIntf {
+ public:
+  virtual ~EngineIntf() {}
+
+ public:
+  /**
+   * @ingroup EngineIntf
+   * @name  : CreatePlugin
+   * @brief : API of user engine, libmsporf call this API to get a plugin
+   * @retval PluginIntf * The pointer of the new plugin
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_engine.h
+   * @since c60
+   * @see ReleasePlugin
+   */
+  virtual PluginIntf *CreatePlugin() = 0;
+
+  /**
+   * @ingroup EngineIntf
+   * @name  : ReleasePlugin
+   * @brief : API of user engine, libmsprof call this API to release a plugin
+   * @param plugin [IN] PluginIntf * the plugin to release
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_engine.h
+   * @since c60
+   * @see CreatePlugin
+   */
+  virtual int ReleasePlugin(PluginIntf *plugin) = 0;
+};
+
+/**
+ *  @defgroup EngineMgr  the EngineMgr group
+ *  This is the EngineMgr group
+ */
+
+/**
+ * @ingroup EngineMgr
+ * @name  : RegisterEngine
+ * @brief : API of libmsprof, register an engine with a name
+ * @param module [IN] const std::string the name of plugin
+ * @param engine [IN] const EngineIntf* the plugin
+ * @retval PROFILING_SUCCESS 0 (success)
+ * @retval PROFILING_FAILED -1 (failed)
+ *
+ * @par depend:
+ * @li libmsprof
+ * @li prof_engine.h
+ * @since c60
+ */
+MSVP_PROF_API int RegisterEngine(const std::string &module, const EngineIntf *engine);
+
+/**
+ * @ingroup EngineMgr
+ * @name  : Init
+ * @brief : API of libmsprof, init an engine with a name
+ * @param module [IN] const std::string  the name of plugin
+ * @param module [IN] const EngineIntf*  the plugin
+ * @retval PROFILING_SUCCESS 0 (success)
+ * @retval PROFILING_FAILED -1 (failed)
+ *
+ * @par depend:
+ * @li libmsprof
+ * @li prof_engine.h
+ * @since c60
+ * @see UnInit
+ */
+MSVP_PROF_API int Init(const std::string &module, const EngineIntf *engine);
+
+/**
+ * @ingroup EngineMgr
+ * @name  : Init
+ * @brief : API of libmsprof, uninit an engine with a name
+ * @param module [IN] const std::string the name of plugin
+ * @retval PROFILING_SUCCESS 0 (success)
+ * @retval PROFILING_FAILED -1 (failed)
+ *
+ * @par depend:
+ * @li libmsprof
+ * @li prof_engine.h
+ * @since c60
+ * @see Init
+ */
+MSVP_PROF_API int UnInit(const std::string &module);
+}  // namespace Engine
+}  // namespace Msprof
+
+#endif  // MSPROF_ENGINE_PROF_ENGINE_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/toolchain/prof_mgr_core.h b/third_party/fwkacllib/inc/inc/toolchain/prof_mgr_core.h
new file mode 100644
index 00000000..f8cb1b22
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/prof_mgr_core.h
@@ -0,0 +1,93 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_
+#define MSPROF_ENGINE_PROF_MGR_CORE_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+
+#include <string>
+#include <vector>
+
+/**
+ * @file prof_mgr_core.h
+ * @brief : struct ProfMgrCfg
+ */
+struct ProfMgrCfg {
+  std::string startCfg; /**< start cfg. json format */
+};
+
+/**
+ * @name  : ProfMgrConf
+ * @brief : struct ProfMgrConf for example [{"ai_core_events":"0xa"}].the vector size means Number of iterations
+ */
+struct ProfMgrConf {
+  std::vector<std::string> conf; /**< for op trace.Ge call this api to get each iteration profiling cfg.json format.*/
+};
+
+/**
+ * @name  : ProfMgrStartUP
+ * @brief : start Profiling task
+ * @param cfg [IN]ProfMgrCfg cfg : config of start_up profiling
+ * @retval void * (success)
+ * @retval nullptr (failed)
+ *
+ * @par depend:
+ * @li libmsprof
+ * @li prof_mgr_core.h
+ * @since c60
+ * @see ProfMgrStop
+ */
+MSVP_PROF_API void *ProfMgrStartUp(const ProfMgrCfg *cfg);
+
+/**
+ * @name  : ProfMgrStop
+ * @brief : stop Profiling task
+ * @param handle [in] void * handle return by ProfMgrStartUP
+ * @retval PROFILING_SUCCESS 0 (success)
+ * @retval PROFILING_FAILED -1 (failed)
+ *
+ * @par depend:
+ * @li libmsprof
+ * @li prof_mgr_core.h
+ * @since c60
+ * @see ProfMgrStartUp
+ */
+MSVP_PROF_API int ProfMgrStop(void *handle);
+
+/**
+ * @name  : ProfMgrGetConf
+ * @brief : get profiler events conf
+ * @param conf [OUT]ProfMgrConf * return by ProfMgrGetConf
+ * @retval PROFILING_SUCCESS 0 (success)
+ * @retval PROFILING_FAILED -1 (failed)
+ * @par depend:
+ * @li libmsprof
+ * @li prof_mgr_core.h
+ * @since c60
+ * @see ProfMgrStartUp
+ */
+MSVP_PROF_API int ProfMgrGetConf(const std::string &aicoreMetricsType, ProfMgrConf *conf);
+
+#endif  // MSPROF_ENGINE_PROF_MGR_CORE_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/inc/toolchain/prof_reporter.h
new file mode 100644
index 00000000..ff91351b
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/prof_reporter.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MSPROF_ENGINE_PROF_REPORTER_H_
+#define MSPROF_ENGINE_PROF_REPORTER_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+#include "prof_callback.h"
+
+/**
+ * @file prof_reporter.h
+ * @defgroup reporter the reporter group
+ * This is the reporter group
+ */
+namespace Msprof {
+namespace Engine {
+/**
+ * @ingroup reporter
+ * @brief class Reporter
+ *  the Reporter class .used to send data to profiling
+ */
+class MSVP_PROF_API Reporter {
+ public:
+  virtual ~Reporter() {}
+
+ public:
+  /**
+   * @ingroup reporter
+   * @name  : Report
+   * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
+              The data will be firstly appended to cache, if the cache is full, data will be ignored
+   * @param data [IN] const ReporterData * the data send to libmsporf
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_reporter.h
+   * @since c60
+   * @see Flush
+   */
+  virtual int Report(const ReporterData *data) = 0;
+
+  /**
+   * @ingroup reporter
+   * @name  : Flush
+   * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
+              The all datas of cache will be write to file or send to host
+   * @retval PROFILING_SUCCESS 0 (success)
+   * @retval PROFILING_FAILED -1 (failed)
+   *
+   * @par depend:
+   * @li libmsprof
+   * @li prof_reporter.h
+   * @since c60
+   * @see ProfMgrStop
+   */
+  virtual int Flush() = 0;
+};
+
+}  // namespace Engine
+}  // namespace Msprof
+
+#endif  // MSPROF_ENGINE_PROF_REPORTER_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/slog.h b/third_party/fwkacllib/inc/inc/toolchain/slog.h
new file mode 100644
index 00000000..7c4f7be2
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/slog.h
@@ -0,0 +1,510 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef D_SYSLOG_H_
+#define D_SYSLOG_H_
+
+#ifdef __cplusplus
+#ifndef LOG_CPP
+extern "C" {
+#endif
+#endif // __cplusplus
+
+#ifndef LINUX
+#define LINUX 0
+#endif // LINUX
+
+#ifndef WIN
+#define WIN 1
+#endif
+
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE == LINUX)
+#define DLL_EXPORT __attribute__((visibility("default")))
+#else
+#define DLL_EXPORT _declspec(dllexport)
+#endif
+
+/**
+ * @ingroup slog
+ *
+ * debug level id
+ */
+#define DLOG_DEBUG 0
+
+/**
+ * @ingroup slog
+ *
+ * info level id
+ */
+#define DLOG_INFO 1
+
+/**
+ * @ingroup slog
+ *
+ * warning level id
+ */
+#define DLOG_WARN 2
+
+/**
+ * @ingroup slog
+ *
+ * error level id
+ */
+#define DLOG_ERROR 3
+
+/**
+ * @ingroup slog
+ *
+ * don't print log
+ */
+#define DLOG_NULL 4
+
+/**
+ * @ingroup slog
+ *
+ * trace log print level id
+ */
+#define DLOG_TRACE 5
+
+/**
+ * @ingroup slog
+ *
+ * oplog log print level id
+ */
+#define DLOG_OPLOG 6
+
+/**
+ * @ingroup slog
+ *
+ * event log print level id
+ */
+#define DLOG_EVENT 0x10
+
+/**
+ * @ingroup slog
+ *
+ * max log length
+ */
+#define MSG_LENGTH 1024
+#define DEBUG_LOG_MASK      (0x00010000)
+#define SECURITY_LOG_MASK   (0x00100000)
+#define RUN_LOG_MASK        (0x01000000)
+#define OPERATION_LOG_MASK  (0x10000000)
+#define RESERVERD_LENGTH 52
+
+typedef struct tagDCODE {
+  const char *cName;
+  int cVal;
+} DCODE;
+
+typedef struct tagKV {
+  char *kname;
+  char *value;
+} KeyValue;
+
+typedef enum {
+    APPLICATION = 0,
+    SYSTEM
+} ProcessType;
+
+typedef struct {
+    ProcessType type;
+    unsigned int pid;
+    unsigned int deviceId;
+    char reserved[RESERVERD_LENGTH];
+} LogAttr;
+
+/**
+ * @ingroup slog
+ *
+ * module id
+ */
+enum {
+  SLOG,          /**< Slog */
+  IDEDD,         /**< IDE daemon device */
+  IDEDH,         /**< IDE daemon host */
+  HCCL,          /**< HCCL */
+  FMK,           /**< Framework */
+  HIAIENGINE,    /**< Matrix */
+  DVPP,          /**< DVPP */
+  RUNTIME,       /**< Runtime */
+  CCE,           /**< CCE */
+#if (OS_TYPE == LINUX)
+    HDC,         /**< HDC */
+#else
+    HDCL,
+#endif // OS_TYPE
+  DRV,           /**< Driver */
+  MDCFUSION,     /**< Mdc fusion */
+  MDCLOCATION,   /**< Mdc location */
+  MDCPERCEPTION, /**< Mdc perception */
+  MDCFSM,
+  MDCCOMMON,
+  MDCMONITOR,
+  MDCBSWP,    /**< MDC base software platform */
+  MDCDEFAULT, /**< MDC undefine */
+  MDCSC,      /**< MDC spatial cognition */
+  MDCPNC,
+  MLL,
+  DEVMM,    /**< Dlog memory managent */
+  KERNEL,   /**< Kernel */
+  LIBMEDIA, /**< Libmedia */
+  CCECPU,   /**< ai cpu */
+  ASCENDDK, /**< AscendDK */
+  ROS,      /**< ROS */
+  HCCP,
+  ROCE,
+  TEFUSION,
+  PROFILING, /**< Profiling */
+  DP,        /**< Data Preprocess */
+  APP,       /**< User Application */
+  TS,        /**< TS module */
+  TSDUMP,    /**< TSDUMP module */
+  AICPU,     /**< AICPU module */
+  LP,        /**< LP module */
+  TDT,
+  FE,
+  MD,
+  MB,
+  ME,
+  IMU,
+  IMP,
+  GE, /**< Fmk */
+  MDCFUSA,
+  CAMERA,
+  ASCENDCL,
+  TEEOS,
+  ISP,
+  SIS,
+  HSM,
+  DSS,
+  PROCMGR,     // Process Manager, Base Platform
+  BBOX,
+  AIVECTOR,
+  TBE,
+  FV,
+  MDCMAP,
+  TUNE,
+  INVLID_MOUDLE_ID
+};
+
+/**
+ * @ingroup slog
+ * @brief External log interface, which called by modules
+ */
+DLL_EXPORT void dlog_init(void);
+
+/**
+ * @ingroup slog
+ * @brief dlog_getlevel: get module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid
+ * @param [out]enableEvent: 1: enable; 0: disable
+ * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ */
+DLL_EXPORT int dlog_getlevel(int moduleId, int *enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief dlog_setlevel: set module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid
+ * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ * @param [in]enableEvent: 1: enable; 0: disable, others:invalid
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief CheckLogLevel: check module level enable or not
+ * users no need to call it because all dlog interface(include inner interface) has already called
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG
+ * @return: 1:enable, 0:disable
+ */
+DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);
+
+/**
+ * @ingroup slog
+ * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
+
+/**
+ * @ingroup slog
+ * @brief dlog_error: print error log
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]fmt: log content
+ */
+#define dlog_error(moduleId, fmt, ...)                                          \
+  do {                                                                          \
+    DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief dlog_warn: print warning log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]fmt: log content
+ */
+#define dlog_warn(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
+        DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                               \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief dlog_info: print info log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]fmt: log content
+ */
+#define dlog_info(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
+        DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                               \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief dlog_debug: print debug log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]fmt: log content
+ */
+#define dlog_debug(moduleId, fmt, ...)                                              \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
+        DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+    }                                                                               \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief dlog_event: print event log
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]fmt: log content
+ */
+#define dlog_event(moduleId, fmt, ...)                                          \
+  do {                                                                          \
+    DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief Dlog: print log, need caller to specify level
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define Dlog(moduleId, level, fmt, ...)                                                 \
+  do {                                                                                  \
+    if(CheckLogLevel(moduleId, level) == 1) {                                           \
+        DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+     }                                                                                  \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogSub: print log, need caller to specify level and submodule
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]submodule: eg: engine
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define DlogSub(moduleId, submodule, level, fmt, ...)                                                   \
+  do {                                                                                                  \
+    if(CheckLogLevel(moduleId, level) == 1) {                                                           \
+        DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+    }                                                                                                   \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogWithKV: print log, need caller to specify level and other paramters
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]pstKVArray: key-value array
+ * @param [in]kvNum: key-value element num in array
+ * @param [in]fmt: log content
+ */
+#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
+  do {                                                                                                          \
+    if(CheckLogLevel(moduleId, level) == 1) {                                                                   \
+        DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                                                           \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogFlush: flush log buffer to file
+ */
+DLL_EXPORT void DlogFlush(void);
+
+/**
+ * @ingroup slog
+ * @brief Internal log interface, other modules are not allowed to call this interface
+ */
+void DlogErrorInner(int moduleId, const char *fmt, ...);
+void DlogWarnInner(int moduleId, const char *fmt, ...);
+void DlogInfoInner(int moduleId, const char *fmt, ...);
+void DlogDebugInner(int moduleId, const char *fmt, ...);
+void DlogEventInner(int moduleId, const char *fmt, ...);
+void DlogInner(int moduleId, int level, const char *fmt, ...);
+void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
+
+#ifdef __cplusplus
+#ifndef LOG_CPP
+}
+#endif // LOG_CPP
+#endif // __cplusplus
+
+#ifdef LOG_CPP
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * @ingroup slog
+ * @brief DlogGetlevelForC: get module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid
+ * @param [out]enableEvent: 1: enable; 0: disable
+ * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ */
+DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief DlogSetlevelForC: set module loglevel and enableEvent
+ *
+ * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid
+ * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output)
+ * @param [in]enableEvent: 1: enable; 0: disable, others:invalid
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent);
+
+/**
+ * @ingroup slog
+ * @brief CheckLogLevelForC: check module level enable or not
+ * users no need to call it because all dlog interface(include inner interface) has already called
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG
+ * @return: 1:enable, 0:disable
+ */
+DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel);
+
+/**
+ * @ingroup slog
+ * @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
+
+/**
+ * @ingroup slog
+ * @brief DlogForC: print log, need caller to specify level
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define DlogForC(moduleId, level, fmt, ...)                                                 \
+  do {                                                                                  \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                           \
+        DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+     }                                                                                  \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogSubForC: print log, need caller to specify level and submodule
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]submodule: eg: engine
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]fmt: log content
+ */
+#define DlogSubForC(moduleId, submodule, level, fmt, ...)                                                   \
+  do {                                                                                                  \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                                           \
+        DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+    }                                                                                                   \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogWithKVForC: print log, need caller to specify level and other paramters
+ * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time
+ *
+ * @param [in]moduleId: module id, eg: CCE
+ * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
+ * @param [in]pstKVArray: key-value array
+ * @param [in]kvNum: key-value element num in array
+ * @param [in]fmt: log content
+ */
+#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
+  do {                                                                                                          \
+    if(CheckLogLevelForC(moduleId, level) == 1) {                                                                   \
+        DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                                                           \
+  } while (0)
+
+/**
+ * @ingroup slog
+ * @brief DlogFlushForC: flush log buffer to file
+ */
+DLL_EXPORT void DlogFlushForC(void);
+
+/**
+ * @ingroup slog
+ * @brief Internal log interface, other modules are not allowed to call this interface
+ */
+void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
+void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // LOG_CPP
+#endif // D_SYSLOG_H_
diff --git a/third_party/fwkacllib/inc/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/inc/toolchain/tuning_tool/tune_api.h
new file mode 100644
index 00000000..e436dafd
--- /dev/null
+++ b/third_party/fwkacllib/inc/inc/toolchain/tuning_tool/tune_api.h
@@ -0,0 +1,137 @@
+/**
+ * @file tune_api.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
+ * 描述：mstune调优接口头文件
+ */
+/** @defgroup mstune mstune调优接口 */
+#ifndef TUNE_API_H
+#define TUNE_API_H
+#include <vector>
+#include <map>
+#include <string>
+#include "graph/graph.h"
+#include "ge/ge_api.h"
+
+/**
+ * @ingroup mstune
+ *
+ * mstune status
+ */
+enum MsTuneStatus {
+    MSTUNE_SUCCESS,  /** tune success */
+    MSTUNE_FAILED,   /** tune failed */
+};
+
+// Option key: for train options sets
+const std::string MSTUNE_SELF_KEY = "mstune";
+const std::string MSTUNE_GEINIT_KEY = "initialize";
+const std::string MSTUNE_GESESS_KEY = "session";
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RunnerInitConfig {
+    // onilne online
+    std::string profPath;
+    std::string parserPath;
+    // ncs only
+    std::vector<uint32_t> devList;
+};
+
+struct RunnerOpInfo {
+    std::string opName;
+    uint64_t opCostTime;
+    uint64_t aicoreCostTime;
+    // gradient_split only
+    std::string modelName;
+    std::string opType;
+    std::vector<uint64_t> start;
+    std::vector<uint64_t> end;
+};
+
+struct RunnerModelInfo {
+    uint64_t totalCostTime;
+};
+
+struct RunnerRunResult {
+    std::vector<RunnerModelInfo> modelInfo;
+    std::vector<RunnerOpInfo> opInfo;
+};
+
+struct RunnerResult {
+    uint64_t totalCostTime;
+    std::map<std::string, uint64_t> opCostTime;
+    std::map<std::string, uint64_t> aicoreCostTime;
+};
+
+struct RunnerDataBuf {
+    void *ptr = nullptr;
+    size_t size = 0;
+};
+
+struct AOEBufferData {
+    std::shared_ptr<uint8_t> data = nullptr;
+    uint64_t length;
+};
+
+struct RunnerConfig {
+    bool isProf;
+    uint32_t loop;
+    // offline only
+    std::vector<RunnerDataBuf> input;
+    std::vector<RunnerDataBuf> output;
+    std::string modelPath;
+    RunnerDataBuf modelData;
+    // online only
+    uint32_t devId;
+    std::vector<std::vector<ge::Tensor>> inputs;
+    std::vector<ge::Graph> dependGraph; // run graph (for training)
+};
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @ingroup mstune
+ * @par 描述: 命令行调优
+ *
+ * @attention 无
+ * @param  option [IN] 调优参数
+ * @param  msg [OUT] 调优异常下返回信息
+ * @retval #MSTUNE_SUCCESS 执行成功
+ * @retval #MSTUNE_FAILED 执行失败
+ * @par 依赖:
+ * @li tune_api.cpp：该接口所属的开发包。
+ * @li tune_api.h：该接口声明所在的头文件。
+ * @see 无
+ * @since
+ */
+MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
+
+/**
+ * @ingroup mstune
+ * @par 描述: 梯度调优
+ *
+ * @attention 无
+ * @param  tuningGraph [IN] 调优图
+ * @param  dependGraph [IN] 调优依赖图
+ * @param  session [IN] ge连接会话
+ * @param  option [IN] 参数集. 包含调优参数及ge参数
+ * @retval #MSTUNE_SUCCESS 执行成功
+ * @retval #MSTUNE_FAILED 执行失败
+ * @par 依赖:
+ * @li tune_api.cpp：该接口所属的开发包。
+ * @li tune_api.h：该接口声明所在的头文件。
+ * @see 无
+ * @since
+ */
+extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
+    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
+
+#endif