Browse Source

Pre Merge pull request !1078 from shenwei41/sw_release

pull/1078/MERGE
shenwei41 Gitee 5 years ago
parent
commit
6d3003c676
100 changed files with 45071 additions and 622 deletions
  1. +4
    -4
      inc/external/acl/acl.h
  2. +73
    -0
      inc/external/acl/acl/acl.h
  3. +617
    -0
      inc/external/acl/acl/acl_base.h
  4. +1210
    -0
      inc/external/acl/acl/acl_mdl.h
  5. +549
    -0
      inc/external/acl/acl/acl_op.h
  6. +115
    -0
      inc/external/acl/acl/acl_op_compiler.h
  7. +296
    -0
      inc/external/acl/acl/acl_prof.h
  8. +950
    -0
      inc/external/acl/acl/acl_rt.h
  9. +283
    -0
      inc/external/acl/acl/acl_tdt.h
  10. +75
    -0
      inc/external/acl/acl/error_codes/ge_error_codes.h
  11. +102
    -0
      inc/external/acl/acl/error_codes/rt_error_codes.h
  12. +431
    -0
      inc/external/acl/acl/ops/acl_cblas.h
  13. +2493
    -0
      inc/external/acl/acl/ops/acl_dvpp.h
  14. +351
    -0
      inc/external/acl/acl/ops/acl_fv.h
  15. +46
    -39
      inc/external/acl/acl_base.h
  16. +190
    -166
      inc/external/acl/acl_mdl.h
  17. +82
    -37
      inc/external/acl/acl_op.h
  18. +28
    -19
      inc/external/acl/acl_op_compiler.h
  19. +21
    -21
      inc/external/acl/acl_prof.h
  20. +79
    -61
      inc/external/acl/acl_rt.h
  21. +19
    -12
      inc/external/acl/acl_tdt.h
  22. +14
    -0
      inc/external/acl/error_codes/ge_error_codes.h
  23. +68
    -67
      inc/external/acl/error_codes/rt_error_codes.h
  24. +138
    -41
      inc/external/acl/ops/acl_cblas.h
  25. +222
    -147
      inc/external/acl/ops/acl_dvpp.h
  26. +6
    -8
      inc/external/acl/ops/acl_fv.h
  27. +133
    -0
      inc/external/hccl/hccl/hccl.h
  28. +101
    -0
      inc/external/hccl/hccl/hccl_types.h
  29. +102
    -0
      inc/external/runtime/runtime/rt_error_codes.h
  30. +60
    -0
      third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
  31. +37
    -0
      third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h
  32. +62
    -0
      third_party/fwkacllib/inc/inc/cce/aicpu_engine.h
  33. +56
    -0
      third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h
  34. +31
    -0
      third_party/fwkacllib/inc/inc/cce/blas_struct.h
  35. +101
    -0
      third_party/fwkacllib/inc/inc/cce/cce.h
  36. +152
    -0
      third_party/fwkacllib/inc/inc/cce/cce_def.hpp
  37. +82
    -0
      third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp
  38. +95
    -0
      third_party/fwkacllib/inc/inc/cce/common/catch.hpp
  39. +36
    -0
      third_party/fwkacllib/inc/inc/cce/compiler_stub.h
  40. +60
    -0
      third_party/fwkacllib/inc/inc/cce/customize.h
  41. +23
    -0
      third_party/fwkacllib/inc/inc/cce/dnn.h
  42. +676
    -0
      third_party/fwkacllib/inc/inc/cce/dnn_base.h
  43. +994
    -0
      third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp
  44. +4838
    -0
      third_party/fwkacllib/inc/inc/cce/dnn_op.h
  45. +23
    -0
      third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp
  46. +894
    -0
      third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp
  47. +130
    -0
      third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h
  48. +56
    -0
      third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp
  49. +65
    -0
      third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h
  50. +54
    -0
      third_party/fwkacllib/inc/inc/cce/taskdown_api.h
  51. +107
    -0
      third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp
  52. +129
    -0
      third_party/fwkacllib/inc/inc/hccl/base.h
  53. +179
    -0
      third_party/fwkacllib/inc/inc/hccl/hcom.h
  54. +142
    -0
      third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h
  55. +561
    -0
      third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h
  56. +98
    -0
      third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h
  57. +83
    -0
      third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h
  58. +566
    -0
      third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h
  59. +78
    -0
      third_party/fwkacllib/inc/inc/ops/aipp.h
  60. +80
    -0
      third_party/fwkacllib/inc/inc/ops/all_ops.h
  61. +1231
    -0
      third_party/fwkacllib/inc/inc/ops/array_ops.h
  62. +162
    -0
      third_party/fwkacllib/inc/inc/ops/audio_ops.h
  63. +166
    -0
      third_party/fwkacllib/inc/inc/ops/batch_ops.h
  64. +59
    -0
      third_party/fwkacllib/inc/inc/ops/bitwise_ops.h
  65. +64
    -0
      third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h
  66. +415
    -0
      third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h
  67. +59
    -0
      third_party/fwkacllib/inc/inc/ops/condtake_ops.h
  68. +407
    -0
      third_party/fwkacllib/inc/inc/ops/control_flow_ops.h
  69. +142
    -0
      third_party/fwkacllib/inc/inc/ops/ctc_ops.h
  70. +2344
    -0
      third_party/fwkacllib/inc/inc/ops/data_flow_ops.h
  71. +3735
    -0
      third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h
  72. +333
    -0
      third_party/fwkacllib/inc/inc/ops/functional_ops.h
  73. +103
    -0
      third_party/fwkacllib/inc/inc/ops/get_data_ops.h
  74. +284
    -0
      third_party/fwkacllib/inc/inc/ops/hcom_ops.h
  75. +81
    -0
      third_party/fwkacllib/inc/inc/ops/hvd_ops.h
  76. +1539
    -0
      third_party/fwkacllib/inc/inc/ops/image_ops.h
  77. +84
    -0
      third_party/fwkacllib/inc/inc/ops/internal_ops.h
  78. +443
    -0
      third_party/fwkacllib/inc/inc/ops/linalg_ops.h
  79. +230
    -0
      third_party/fwkacllib/inc/inc/ops/list_ops.h
  80. +116
    -0
      third_party/fwkacllib/inc/inc/ops/logging_ops.h
  81. +308
    -0
      third_party/fwkacllib/inc/inc/ops/lookup_ops.h
  82. +957
    -0
      third_party/fwkacllib/inc/inc/ops/math_ops.h
  83. +1048
    -0
      third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h
  84. +485
    -0
      third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h
  85. +1711
    -0
      third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h
  86. +1654
    -0
      third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h
  87. +1279
    -0
      third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h
  88. +53
    -0
      third_party/fwkacllib/inc/inc/ops/nn_ops.h
  89. +1608
    -0
      third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h
  90. +2598
    -0
      third_party/fwkacllib/inc/inc/ops/nn_training_ops.h
  91. +41
    -0
      third_party/fwkacllib/inc/inc/ops/no_op.h
  92. +889
    -0
      third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h
  93. +122
    -0
      third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h
  94. +27
    -0
      third_party/fwkacllib/inc/inc/ops/outfeed_ops.h
  95. +445
    -0
      third_party/fwkacllib/inc/inc/ops/pad_ops.h
  96. +56
    -0
      third_party/fwkacllib/inc/inc/ops/parsing_ops.h
  97. +224
    -0
      third_party/fwkacllib/inc/inc/ops/quantize_ops.h
  98. +65
    -0
      third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h
  99. +98
    -0
      third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h
  100. +60
    -0
      third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h

+ 4
- 4
inc/external/acl/acl.h View File

@@ -26,9 +26,9 @@ extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
@@ -70,4 +70,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_
#endif // INC_EXTERNAL_ACL_ACL_H_

+ 73
- 0
inc/external/acl/acl/acl.h View File

@@ -0,0 +1,73 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_H_
#define INC_EXTERNAL_ACL_ACL_H_

#include "acl_rt.h"
#include "acl_op.h"
#include "acl_mdl.h"

#ifdef __cplusplus
extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
* @brief acl initialize
*
* @par Restriction
* The aclInit interface can be called only once in a process
* @param configPath [IN] the config path,it can be NULL
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);

/**
* @ingroup AscendCL
* @brief acl finalize
*
* @par Restriction
* Need to call aclFinalize before the process exits.
* After calling aclFinalize,the services cannot continue to be used normally.
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclFinalize();

/**
* @ingroup AscendCL
* @brief query ACL interface version
*
* @param majorVersion[OUT] ACL interface major version
* @param minorVersion[OUT] ACL interface minor version
* @param patchVersion[OUT] ACL interface patch version
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_

+ 617
- 0
inc/external/acl/acl/acl_base.h View File

@@ -0,0 +1,617 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
#define INC_EXTERNAL_ACL_ACL_BASE_H_

#include <stdint.h>
#include <stddef.h>
#include "error_codes/rt_error_codes.h"
#include "error_codes/ge_error_codes.h"

#ifdef __cplusplus
extern "C" {
#endif

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define ACL_FUNC_VISIBILITY _declspec(dllexport)
#else
#define ACL_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define ACL_FUNC_VISIBILITY
#endif
#endif

#ifdef __GNUC__
#define ACL_DEPRECATED __attribute__((deprecated))
#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message)))
#elif defined(_MSC_VER)
#define ACL_DEPRECATED __declspec(deprecated)
#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message))
#else
#define ACL_DEPRECATED
#define ACL_DEPRECATED_MESSAGE(message)
#endif

typedef void *aclrtStream;
typedef void *aclrtEvent;
typedef void *aclrtContext;
typedef int aclError;
typedef uint16_t aclFloat16;
typedef struct aclDataBuffer aclDataBuffer;
typedef struct aclTensorDesc aclTensorDesc;

static const int ACL_ERROR_NONE = 0;
static const int ACL_SUCCESS = 0;

static const int ACL_ERROR_INVALID_PARAM = 100000;
static const int ACL_ERROR_UNINITIALIZE = 100001;
static const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
static const int ACL_ERROR_INVALID_FILE = 100003;
static const int ACL_ERROR_WRITE_FILE = 100004;
static const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
static const int ACL_ERROR_PARSE_FILE = 100006;
static const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
static const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
static const int ACL_ERROR_INVALID_MODEL_ID = 100011;
static const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
static const int ACL_ERROR_PARSE_MODEL = 100013;
static const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
static const int ACL_ERROR_OP_NOT_FOUND = 100024;
static const int ACL_ERROR_OP_LOAD_FAILED = 100025;
static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
static const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
static const int ACL_ERROR_REPEAT_FINALIZE = 100037;
static const int ACL_ERROR_NOT_STATIC_AIPP = 100038;
static const int ACL_ERROR_COMPILING_STUB_MODE = 100039;
static const int ACL_ERROR_GROUP_NOT_SET = 100040;
static const int ACL_ERROR_GROUP_NOT_CREATE = 100041;
static const int ACL_ERROR_PROF_ALREADY_RUN = 100042;
static const int ACL_ERROR_PROF_NOT_RUN = 100043;
static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044;
static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
static const int ACL_ERROR_INVALID_OPP_PATH = 148049;

static const int ACL_ERROR_BAD_ALLOC = 200000;
static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
static const int ACL_ERROR_INVALID_DEVICE = 200002;
static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;
static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007;

static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;

static const int ACL_ERROR_INTERNAL_ERROR = 500000;
static const int ACL_ERROR_FAILURE = 500001;
static const int ACL_ERROR_GE_FAILURE = 500002;
static const int ACL_ERROR_RT_FAILURE = 500003;
static const int ACL_ERROR_DRV_FAILURE = 500004;
static const int ACL_ERROR_PROFILING_FAILURE = 500005;

#define ACL_TENSOR_SHAPE_RANGE_NUM 2
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

/**
* @ingroup AscendCL
* @brief Converts data of type aclFloat16 to data of type float
*
* @param value [IN] Data to be converted
*
* @retval Transformed data
*/
ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);

/**
* @ingroup AscendCL
* @brief Converts data of type float to data of type aclFloat16
*
* @param value [IN] Data to be converted
*
* @retval Transformed data
*/
ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);

/**
* @ingroup AscendCL
* @brief create data of aclDataBuffer
*
* @param data [IN] pointer to data
* @li Need to be managed by the user,
* call aclrtMalloc interface to apply for memory,
* call aclrtFree interface to release memory
*
* @param size [IN] size of data in bytes
*
* @retval pointer to created instance. nullptr if run out of memory
*
* @see aclrtMalloc | aclrtFree
*/
ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);

/**
* @ingroup AscendCL
* @brief destroy data of aclDataBuffer
*
* @par Function
* Only the aclDataBuffer type data is destroyed here.
* The memory of the data passed in when the aclDataDataBuffer interface
* is called to create aclDataBuffer type data must be released by the user
*
* @param dataBuffer [IN] pointer to the aclDataBuffer
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief update new data of aclDataBuffer
*
* @param dataBuffer [OUT] pointer to aclDataBuffer
* @li The old data need to be released by the user, otherwise it may occur memory leak leakage
* call aclGetDataBufferAddr interface to get old data address
* call aclrtFree interface to release memory
*
* @param data [IN] pointer to new data
* @li Need to be managed by the user,
* call aclrtMalloc interface to apply for memory,
* call aclrtFree interface to release memory
*
* @param size [IN] size of data in bytes
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
*/
ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);

/**
* @ingroup AscendCL
* @brief get data address from aclDataBuffer
*
* @param dataBuffer [IN] pointer to the data of aclDataBuffer
*
* @retval data address
*/
ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get data size of aclDataBuffer
*
* @param dataBuffer [IN] pointer to the data of aclDataBuffer
*
* @retval data size
*/
ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead")
ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get data size of aclDataBuffer to replace aclGetDataBufferSize
*
* @param dataBuffer [IN] pointer to the data of aclDataBuffer
*
* @retval data size
*/
ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get size of aclDataType
*
* @param dataType [IN] aclDataType data the size to get
*
* @retval size of the aclDataType
*/
ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);

// interfaces of tensor desc
/**
* @ingroup AscendCL
* @brief create data aclTensorDesc
*
* @param dataType [IN] Data types described by tensor
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
* @param format [IN] tensor format
*
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
aclFormat format);

/**
* @ingroup AscendCL
* @brief destroy data aclTensorDesc
*
* @param desc [IN] pointer to the data of aclTensorDesc to destroy
*/
ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief set tensor shape range for aclTensorDesc
*
* @param desc [OUT] pointer to the data of aclTensorDesc
* @param dimsCount [IN] the number of dimensions of the shape
* @param dimsRange [IN] the range of dimensions of the shape
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

/**
* @ingroup AscendCL
* @brief get data type specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval data type specified by the tensor description.
* @retval ACL_DT_UNDEFINED if description is null
*/
ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get data format specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval data format specified by the tensor description.
* @retval ACL_FORMAT_UNDEFINED if description is null
*/
ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get tensor size specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval data size specified by the tensor description.
* @retval 0 if description is null
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get element count specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval element count specified by the tensor description.
* @retval 0 if description is null
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get number of dims specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval number of dims specified by the tensor description.
* @retval 0 if description is null
* @retval ACL_UNKNOWN_RANK if the tensor dim is -2
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief Get the size of the specified dim in the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @param index [IN] index of dims, start from 0.
*
* @retval dim specified by the tensor description and index.
* @retval -1 if description or index is invalid
*/
ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead")
ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);

/**
* @ingroup AscendCL
* @brief Get the size of the specified dim in the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @param index [IN] index of dims, start from 0.
* @param dimSize [OUT] size of the specified dim.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize);

/**
* @ingroup AscendCL
* @brief Get the range of the specified dim in the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @param index [IN] index of dims, start from 0.
* @param dimRangeNum [IN] number of dimRange.
* @param dimRange [OUT] range of the specified dim.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
size_t index,
size_t dimRangeNum,
int64_t *dimRange);

/**
* @ingroup AscendCL
* @brief set tensor description name
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param name [IN] tensor description name
*/
ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);

/**
* @ingroup AscendCL
* @brief get tensor description name
*
* @param desc [IN] pointer to the instance of aclTensorDesc
*
* @retval tensor description name.
* @retval empty string if description is null
*/
ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief Convert the format in the source aclTensorDesc according to
* the specified dstFormat to generate a new target aclTensorDesc.
* The format in the source aclTensorDesc remains unchanged.
*
* @param srcDesc [IN] pointer to the source tensor desc
* @param dstFormat [IN] destination format
* @param dstDesc [OUT] pointer to the pointer to the destination tensor desc
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
* @brief Set the storage format specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param format [IN] the storage format
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead")
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);

/**
* @ingroup AscendCL
* @brief Set the storage shape specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead")
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);

/**
* @ingroup AscendCL
* @brief Set the format specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param format [IN] the storage format
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format);

/**
* @ingroup AscendCL
* @brief Set the shape specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims);

/**
* @ingroup AscendCL
* @brief Set the original format specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param format [IN] the storage format
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format);

/**
* @ingroup AscendCL
* @brief Set the original shape specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims);

/**
* @ingroup AscendCL
* @brief get op description info
*
* @param desc [IN] pointer to tensor description
* @param index [IN] index of tensor
*
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);

/**
* @ingroup AscendCL
* @brief get address of tensor
*
* @param desc [IN] pointer to tensor description
*
* @retval null for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief Set the dynamic input name specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param dynamicInputName [IN] pointer to the dynamic input name
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);

/**
* @ingroup AscendCL
* @brief Set const data specified by the tensor description
*
* @param desc [OUT] pointer to the instance of aclTensorDesc
* @param dataBuffer [IN] pointer to the const databuffer
* @param length [IN] the length of const databuffer
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);

/**
* @ingroup AscendCL
* @brief an interface for users to output APP logs
*
* @param logLevel [IN] the level of current log
* @param func [IN] the function where the log is located
* @param file [IN] the file where the log is located
* @param line [IN] Number of source lines where the log is located
* @param fmt [IN] the format of current log
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);

#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 1210
- 0
inc/external/acl/acl/acl_mdl.h
File diff suppressed because it is too large
View File


+ 549
- 0
inc/external/acl/acl/acl_op.h View File

@@ -0,0 +1,549 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_OP_H_
#define INC_EXTERNAL_ACL_ACL_OP_H_

#include "acl_base.h"
#include "acl_rt.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct aclopHandle aclopHandle;
typedef struct aclopAttr aclopAttr;
typedef struct aclopKernelDesc aclopKernelDesc;

typedef void (*aclDataDeallocator)(void *data, size_t length);

static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
* @ingroup AscendCL
* @brief Set base directory that contains single op models
*
* @par Restriction
* The aclopSetModelDir interface can be called only once in a process.
* @param modelDir [IN] path of the directory
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);

/**
* @ingroup AscendCL
* @brief load single op models from memory
*
* @par Restriction
* The aclopLoad interface can be called more than one times in a process.
* @param model [IN] address of single op models
* @param modelSize [IN] size of single op models
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);

/**
* @ingroup AscendCL
* @brief create data of type aclopAttr
*
* @retval pointer to created instance.
* @retval nullptr if run out of memory
*/
ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();

/**
* @ingroup AscendCL
* @brief destroy data of typ aclopAttr
*
* @param attr [IN] pointer to the instance of aclopAttr
*/
ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is bool
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
* false if attrValue is 0, true otherwise.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is int64_t
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is float
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is string
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of bools
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values. false if attrValue is 0, true otherwise.
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of ints
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of floats
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of strings
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of list of ints
*
* @param attr [OUT] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numLists [IN] number of lists
* @param numValues [IN] pointer to number of values of each list
* @param values [IN] pointer to values
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);

/**
* @ingroup AscendCL
* @brief Load and execute the specified operator asynchronously
*
* @par Restriction
* @li The input and output organization of each operator is different,
* and the application needs to organize the operator strictly
* according to the operator input and output parameters when calling.
* @li When the user calls aclopExecute,
* the ACL finds the corresponding task according to the optype,
* the description of the input tesnsor,
* the description of the output tesnsor, and attr, and issues the execution.
*
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param outputs [OUT] pointer to array of output buffers
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Load and execute the specified operator
* The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc
*
* @par Restriction
* @li The input and output organization of each operator is different,
* and the application needs to organize the operator strictly
* according to the operator input and output parameters when calling.
* @li When the user calls aclopExecuteV2,
* the ACL finds the corresponding task according to the optype,
* the description of the input tesnsor,
* the description of the output tesnsor, and attr, and issues the execution.
*
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN|OUT] pointer to array of output tensor descriptions
* @param outputs [OUT] pointer to array of output buffers
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[],
aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a instance of aclopHandle.
*
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param opAttr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief destroy aclopHandle instance
*
* @param handle [IN] pointer to the instance of aclopHandle
*/
ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);

/**
* @ingroup AscendCL
* @brief execute an op with the handle.
* can save op model matching cost compared with aclopExecute
*
* @param handle [IN] pointer to the instance of aclopHandle.
* The aclopCreateHandle interface has been called
* in advance to create aclopHandle type data.
* @param numInputs [IN] number of inputs
* @param inputs [IN] pointer to array of input buffers.
* The aclCreateDataBuffer interface has been called
* in advance to create aclDataBuffer type data.
* @param numOutputs [IN] number of outputs
* @param outputs [OUT] pointer to array of output buffers
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief cast data type
*
* @param srcDesc [IN] source tensor desc
* @param srcBuffer [IN] source tensor buffer
* @param dstDesc [IN] destination tensor desc
* @param dstBuffer [OUT] destination tensor buffer
* @param truncate [IN] do not truncate if value is 0, truncate otherwise
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for casting datatype
*
* @param srcDesc [IN] source tensor desc
* @param dstDesc [IN] destination tensor desc
* @param truncate [IN] do not truncate if value is 0, truncate otherwise
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
*
* @param opType [IN] op type
* @param kernelId [IN] kernel id
* @param kernelName [IN] kernel name
* @param binData [IN] kernel bin data
* @param binSize [IN] kernel bin size
* @param enginetype [IN] enigne type
* @param deallocator [IN] callback function for deallocating bin data,
* null if bin data to be deallocated by caller
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
*
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param opAttr [IN] pointer to instance of aclopAttr
* @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
* @ingroup AscendCL
* @brief register compile function
*
* @param opType [IN] op type
* @param func [IN] compile function
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopUnregisterCompileFunc
*/
ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);

/**
* @ingroup AscendCL
* @brief unregister compile function
*
* @param opType [IN] op type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);

/**
* @ingroup AscendCL
* @brief set kernel args
*
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc
* @param kernelId [IN] kernel id
* @param blockDim [IN] block dim
* @param args [IN] args
* @param argSize [IN] size in bytes of args
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);

/**
* @ingroup AscendCL
* @brief set workspace sizes
*
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc
* @param numWorkspaces [IN] number of workspaces
* @param workspaceSizes [IN] pointer to array of sizes of workspaces
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
size_t *workspaceSizes);

/**
* @ingroup AscendCL
* @brief compile op with dynamic shape
*
* @param opType [IN] op type
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);

/**
* @ingroup AscendCL
* @brief inferShape the specified operator synchronously
*
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [OUT] pointer to array of output tensor descriptions
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclopAttr *attr);


#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 115
- 0
inc/external/acl/acl/acl_op_compiler.h View File

@@ -0,0 +1,115 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

#include "acl_base.h"
#include "acl_op.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclCompileType {
ACL_COMPILE_SYS,
ACL_COMPILE_UNREGISTERED
} aclopCompileType;

typedef enum {
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
} aclCompileOpt;

/**
* @ingroup AscendCL
* @brief compile op
*
* @param opType [IN] op type
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param engineType [IN] engine type
* @param compileFlag [IN] compile flag
* @param opPath [IN] path of op
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr,
aclopEngineType engineType,
aclopCompileType compileFlag,
const char *opPath);

/**
* @ingroup AscendCL
* @brief compile and execute op
*
* @param opType [IN] op type
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param outputs [IN] pointer to array of outputs buffers
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param engineType [IN] engine type
* @param compileFlag [IN] compile flag
* @param opPath [IN] path of op
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
const char *opPath, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief set compile option
*
* @param aclCompileOpt [IN] compile option
* @param value [IN] pointer for the option value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

+ 296
- 0
inc/external/acl/acl/acl_prof.h View File

@@ -0,0 +1,296 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_PROF_H_
#define INC_EXTERNAL_ACL_PROF_H_

#include "acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef struct aclprofConfig aclprofConfig;
typedef struct aclprofStopConfig aclprofStopConfig;
typedef struct aclprofAicoreEvents aclprofAicoreEvents;
typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;

/**
* @ingroup AscendCL
* @brief profiling initialize
*
* @param profilerResultPath [IN] path of profiling result
* @param length [IN] length of profilerResultPath
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofFinalize
*/
ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length);

/**
* @ingroup AscendCL
* @brief profiling finalize
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofInit
*/
ACL_FUNC_VISIBILITY aclError aclprofFinalize();

/**
* @ingroup AscendCL
* @brief Start profiling modules by profilerConfig
*
* @param profilerConfig [IN] config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofStop
*/
ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief Create data of type aclprofConfig
*
* @param deviceIdList [IN] list of device id
* @param deviceNums [IN] number of devices
* @param aicoreMetrics [IN] type of aicore metrics
* @param aicoreEvents [IN] pointer to aicore events, only support NULL now
* @param dataTypeConfig [IN] config modules need profiling
*
* @retval the aclprofConfig pointer
*
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
* @brief Destroy data of type aclprofConfig
*
* @param profilerConfig [IN] config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofCreateConfig
*/
ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief stop profiling modules by stopProfilingConfig
*
* @param profilerConfig [IN] pointer to stop config of profiling
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofStart
*/
ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);

/**
* @ingroup AscendCL
* @brief subscribe profiling data of model
*
* @param modelId [IN] the model id subscribed
* @param profSubscribeConfig [IN] pointer to config of model subscribe
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
* @brief unsubscribe profiling data of model
*
* @param modelId [IN] the model id unsubscribed
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofModelSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);

/**
* @ingroup AscendCL
* @brief create subscribe config
*
* @param timeInfoSwitch [IN] switch whether get time info from model
* @param aicoreMetrics [IN] aicore metrics
* @param fd [IN] pointer to write pipe
*
* @retval the aclprofSubscribeConfig pointer
*
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
* @brief destroy subscribe config
*
* @param profSubscribeConfig [IN] subscribe config
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclprofCreateSubscribeConfig
*/
ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
* @brief create subscribe config
*
* @param opDescSize [OUT] size of op desc
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize);

/**
* @ingroup AscendCL
* @brief get op number from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param opNumber [OUT] op number of subscription data
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);

/**
* @ingroup AscendCL
* @brief get op type from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
* @param opType [OUT] obtained op type string
* @param opTypeLen [IN] obtained length of op type string
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opType, size_t opTypeLen);

/**
* @ingroup AscendCL
* @brief get op type from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
* @param opName [OUT] obtained op name string
* @param opNameLen [IN] obtained length of op name string
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opName, size_t opNameLen);

/**
* @ingroup AscendCL
* @brief get start time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval start time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get end time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval end time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get excution time of specified op from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
* @param index [IN] index of op array in opInfo
*
* @retval execution time(us) of specified op with timestamp
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);

/**
* @ingroup AscendCL
* @brief get model id from subscription data
*
* @param opInfo [IN] pointer to subscription data
* @param opInfoLen [IN] memory size of subscription data
*
* @retval model id of subscription data
* @retval 0 for failed
*/
ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_

+ 950
- 0
inc/external/acl/acl/acl_rt.h View File

@@ -0,0 +1,950 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
#define INC_EXTERNAL_ACL_ACL_RT_H_

#include <stdint.h>
#include <stddef.h>
#include "acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;

typedef struct rtExceptionInfo aclrtExceptionInfo;

typedef void (*aclrtCallback)(void *userData);

typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);

/**
* @ingroup AscendCL
* @brief Set a callback function to handle exception information
*
* @param callback [IN] callback function to handle exception information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);

/**
* @ingroup AscendCL
* @brief Get task id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The task id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get stream id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The stream id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get thread id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The thread id of fail task
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get device id from exception information
*
* @param info [IN] pointer of exception information
*
* @retval The thread id of fail task
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief The thread that handles the callback function on the Stream
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Add a callback function to be executed on the host
* to the task queue of the Stream
*
* @param fn [IN] Specify the callback function to be added
* The function prototype of the callback function is:
* typedef void (*aclrtCallback)(void *userData);
* @param userData [IN] User data to be passed to the callback function
* @param blockType [IN] callback block type
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief After waiting for a specified time, trigger callback processing
*
* @par Function
* The thread processing callback specified by
* the aclrtSubscribeReport interface
*
* @param timeout [IN] timeout value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSubscribeReport
*/
ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);

/**
* @ingroup AscendCL
* @brief Cancel thread registration,
* the callback function on the specified Stream
* is no longer processed by the specified thread
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create context and associates it with the calling thread
*
* @par Function
* The following use cases are supported:
* @li If you don't call the aclrtCreateContext interface
* to explicitly create the context,
* the system will use the default context, which is implicitly created
* when the aclrtSetDevice interface is called.
* @li If multiple contexts are created in a process
* (there is no limit on the number of contexts),
* the current thread can only use one of them at the same time.
* It is recommended to explicitly specify the context of the current thread
* through the aclrtSetCurrentContext interface to increase.
* the maintainability of the program.
*
* @param context [OUT] point to the created context
* @param deviceId [IN] device to create context on
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetDevice | aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);

/**
* @ingroup AscendCL
* @brief destroy context instance
*
* @par Function
* Can only destroy context created through aclrtCreateContext interface
*
* @param context [IN] the context to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief set the context of the thread
*
* @par Function
* The following scenarios are supported:
* @li If the aclrtCreateContext interface is called in a thread to explicitly
* create a Context (for example: ctx1), the thread's Context can be specified
* without calling the aclrtSetCurrentContext interface.
* The system uses ctx1 as the context of thread1 by default.
* @li If the aclrtCreateContext interface is not explicitly created,
* the system uses the default context as the context of the thread.
* At this time, the aclrtDestroyContext interface cannot be used to release
* the default context.
* @li If the aclrtSetCurrentContext interface is called multiple times to
* set the thread's Context, the last one prevails.
*
* @par Restriction
* @li If the cevice corresponding to the context set for the thread
* has been reset, you cannot set the context as the context of the thread,
* otherwise a business exception will result.
* @li It is recommended to use the context created in a thread.
* If the aclrtCreateContext interface is called in thread A to create a context,
* and the context is used in thread B,
* the user must guarantee the execution order of tasks in the same stream
* under the same context in two threads.
*
* @param context [IN] the current context of the thread
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext | aclrtDestroyContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief get the context of the thread
*
* @par Function
* If the user calls the aclrtSetCurrentContext interface
* multiple times to set the context of the current thread,
* then the last set context is obtained
*
* @param context [OUT] the current context of the thread
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);

/**
* @ingroup AscendCL
* @brief Specify the device to use for the operation
* implicitly create the default context and the default stream
*
* @par Function
* The following use cases are supported:
* @li Device can be specified in the process or thread.
* If you call the aclrtSetDevice interface multiple
* times to specify the same device,
* you only need to call the aclrtResetDevice interface to reset the device.
* @li The same device can be specified for operation
* in different processes or threads.
* @li Device is specified in a process,
* and multiple threads in the process can share this device to explicitly
* create a Context (aclrtCreateContext interface).
* @li In multi-device scenarios, you can switch to other devices
* through the aclrtSetDevice interface in the process.
*
* @param deviceId [IN] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtResetDevice |aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief Reset the current operating Device and free resources on the device,
* including the default context, the default stream,
* and all streams created under the default context,
* and synchronizes the interface.
* If the task under the default context or stream has not been completed,
* the system will wait for the task to complete before releasing it.
*
* @par Restriction
* @li The Context, Stream, and Event that are explicitly created
* on the device to be reset. Before resetting,
* it is recommended to follow the following interface calling sequence,
* otherwise business abnormalities may be caused.
* @li Interface calling sequence:
* call aclrtDestroyEvent interface to release Event or
* call aclrtDestroyStream interface to release explicitly created Stream->
* call aclrtDestroyContext to release explicitly created Context->
* call aclrtResetDevice interface
*
* @param deviceId [IN] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief get target device of current thread
*
* @param deviceId [OUT] the device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);

/**
* @ingroup AscendCL
* @brief get target side
*
* @param runMode [OUT] the run mode
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);

/**
* @ingroup AscendCL
* @brief Wait for compute device to finish
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);

/**
* @ingroup AscendCL
* @brief Set Scheduling TS
*
* @param tsId [IN] the ts id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);

/**
* @ingroup AscendCL
* @brief get total device number.
*
* @param count [OUT] the device number
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);

/**
* @ingroup AscendCL
* @brief create event instance
*
* @param event [OUT] created event
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);

/**
* @ingroup AscendCL
* @brief destroy event instance
*
* @par Function
* Only events created through the aclrtCreateEvent interface can be
* destroyed, synchronous interfaces. When destroying an event,
* the user must ensure that the tasks involved in the aclrtSynchronizeEvent
* interface or the aclrtStreamWaitEvent interface are completed before
* they are destroyed.
*
* @param event [IN] event to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief Record an Event in the Stream
*
* @param event [IN] event to record
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Reset an event
*
* @par Function
* Users need to make sure to wait for the tasks in the Stream
* to complete before resetting the Event
*
* @param event [IN] event to reset
* @param stream [IN] stream handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Queries an event's status
*
* @param event [IN] event to query
* @param status [OUT] event status
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);

/**
* @ingroup AscendCL
* @brief Block Host Running, wait event to be complete
*
* @param event [IN] event to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief computes the elapsed time between events.
*
* @param ms [OUT] time between start and end in ms
* @param start [IN] starting event
* @param end [IN] ending event
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);

/**
* @ingroup AscendCL
* @brief alloc memory on device
*
* @par Function
* alloc for size linear memory on device
* and return a pointer to allocated memory by *devPtr
*
* @par Restriction
* @li The memory requested by the aclrtMalloc interface needs to be released
* through the aclrtFree interface.
* @li Before calling the media data processing interface,
* if you need to apply memory on the device to store input or output data,
* you need to call acldvppMalloc to apply for memory.
*
* @param devPtr [OUT] pointer to pointer to allocated memory on device
* @param size [IN] alloc memory size
* @param policy [IN] memory alloc policy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
* @brief allocate memory on device with cache
*
* @par Function
* alloc for size linear memory on device
* and return a pointer to allocated memory by *devPtr
*
* @par Restriction
* @li The memory requested by the aclrtMallocCached interface needs to be released
* through the aclrtFree interface.
*
* @param devPtr [OUT] pointer to pointer to allocated memory on device
* @param size [IN] alloc memory size
* @param policy [IN] memory alloc policy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
* @brief flush cache data to ddr
*
* @param devPtr [IN] the pointer that flush data to ddr
* @param size [IN] flush size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);

/**
* @ingroup AscendCL
* @brief invalidate cache data
*
* @param devPtr [IN] pointer to invalidate cache data
* @param size [IN] invalidate size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);

/**
* @ingroup AscendCL
* @brief free device memory
*
* @par Function
* can only free memory allocated through the aclrtMalloc interface
*
* @param devPtr [IN] Pointer to memory to be freed
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);

/**
* @ingroup AscendCL
* @brief alloc memory on host
*
* @par Restriction
* @li The requested memory cannot be used in the Device
* and needs to be explicitly copied to the Device.
* @li The memory requested by the aclrtMallocHost interface
* needs to be released through the aclrtFreeHost interface.
*
* @param hostPtr [OUT] pointer to pointer to allocated memory on the host
* @param size [IN] alloc memory size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFreeHost
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);

/**
* @ingroup AscendCL
* @brief free host memory
*
* @par Function
* can only free memory allocated through the aclrtMallocHost interface
*
* @param hostPtr [IN] free memory pointer
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMallocHost
*/
ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);

/**
* @ingroup AscendCL
* @brief synchronous memory replication between host and device
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of the destination address memory
* @param src [IN] source address pointer
* @param count [IN] the length of byte to copy
* @param kind [IN] memcpy type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind);

/**
* @ingroup AscendCL
* @brief Initialize memory and set contents of memory to specified value
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] Starting address of memory
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] Set value
* @param count [IN] The length of memory
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);

/**
* @ingroup AscendCL
* @brief Asynchronous memory replication between Host and Device
*
* @par Function
* After calling this interface,
* be sure to call the aclrtSynchronizeStream interface to ensure that
* the task of memory replication has been completed
*
* @par Restriction
* @li For on-chip Device-to-Device memory copy,
* both the source and destination addresses must be 64-byte aligned
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of destination address memory
* @param src [IN] source address pointer
* @param count [IN] the number of byte to copy
* @param kind [IN] memcpy type
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create stream instance
*
* @param stream [OUT] the created stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);

/**
* @ingroup AscendCL
* @brief destroy stream instance
*
* @par Function
* Can only destroy streams created through the aclrtCreateStream interface
*
* @par Restriction
* Before calling the aclrtDestroyStream interface to destroy
* the specified Stream, you need to call the aclrtSynchronizeStream interface
* to ensure that the tasks in the Stream have been completed.
*
* @param stream [IN] the stream to destroy
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateStream | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief block the host until all tasks
* in the specified stream have completed
*
* @param stream [IN] the stream to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Blocks the operation of the specified Stream until
* the specified Event is completed.
* Support for multiple streams waiting for the same event.
*
* @param stream [IN] the wait stream If using thedefault Stream, set NULL
* @param event [IN] the event to wait
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);

/**
* @ingroup AscendCL
* @brief set group
*
* @par Function
* set the task to the corresponding group
*
* @param groupId [IN] group id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
*/
ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);

/**
* @ingroup AscendCL
* @brief get the number of group
*
* @par Function
* get the number of group. if the number of group is zero,
* it means that group is not supported or group is not created.
*
* @param count [OUT] the number of group
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);

/**
* @ingroup AscendCL
* @brief create group information
*
* @retval null for failed.
* @retval OtherValues success.
*
* @see aclrtDestroyGroupInfo
*/
ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();

/**
* @ingroup AscendCL
* @brief destroy group information
*
* @param groupInfo [IN] pointer to group information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);

/**
* @ingroup AscendCL
* @brief get all group information
*
* @param groupInfo [OUT] pointer to group information
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount
*/
ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);

/**
* @ingroup AscendCL
* @brief get detail information of group
*
* @param groupInfo [IN] pointer to group information
* @param groupId [IN] group index value
* @param attr [IN] group attribute
* @param attrValue [OUT] pointer to attribute value
* @param valueLen [IN] length of attribute value
* @param paramRetSize [OUT] pointer to real length of attribute value
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
int32_t groupId,
aclrtGroupAttr attr,
void *attrValue,
size_t valueLen,
size_t *paramRetSize);

/**
* @ingroup AscendCL
* @brief checking whether current device and peer device support the p2p feature
*
* @param canAccessPeer [OUT] pointer to save the checking result
* @param deviceId [IN] current device id
* @param peerDeviceId [IN] peer device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);

/**
* @ingroup AscendCL
* @brief enable the peer device to support the p2p feature
*
* @param peerDeviceId [IN] the peer device id
* @param flags [IN] reserved field, now it must be zero
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);

/**
* @ingroup AscendCL
* @brief disable the peer device to support the p2p function
*
* @param peerDeviceId [IN] the peer device id
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
*/
ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);

/**
* @ingroup AscendCL
* @brief Obtain the free memory and total memory of specified attribute.
* the specified memory include normal memory and huge memory.
*
* @param attr [IN] the memory attribute of specified device
* @param free [OUT] the free memory of specified device
* @param total [OUT] the total memory of specified device.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_


+ 283
- 0
inc/external/acl/acl/acl_tdt.h View File

@@ -0,0 +1,283 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
#define INC_EXTERNAL_ACL_ACL_TDT_H_

#include "acl/acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
typedef struct acltdtDataset acltdtDataset;
typedef struct acltdtChannelHandle acltdtChannelHandle;

/**
* @ingroup AscendCL
* @brief Get tensor type from item
*
* @param dataItem [IN] pointer to the data item
*
* @retval Tensor type.
* @retval ACL_DT_UNDEFINED if dataItem is null
*/
ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data type from item
*
* @param dataItem [IN] pointer to the data item
*
* @retval Data type.
* @retval ACL_DT_UNDEFINED if dataItem is null
*/
ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data address from item
*
* @param dataItem [IN] pointer to data item
*
* @retval null for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get data size from item
*
* @param dataItem [IN] pointer to data item
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get dim's number from item
*
* @param dataItem [IN] pointer to data item
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get dims from item
*
* @param dataItem [IN] the struct of data item
* @param dims [IN|OUT] pointer to the dims of dataTtem
* @param dimNum [IN] the size of the dims
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);

/**
* @ingroup AscendCL
* @brief Create the struct of data item
*
* @param tdtType [IN] Tdt tensor type
* @param dims [IN] pointer of tdtDataItem's dims
* @param dimNum [IN] Dim number
* @param dataType [IN] Data type
* @param data [IN] Data pointer
* @param size [IN] Data size
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
const int64_t *dims,
size_t dimNum,
aclDataType dataType,
void *data,
size_t size);

/**
* @ingroup AscendCL
* @brief Destroy the struct of data item
*
* @param dataItem [IN] pointer to the data item
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateDataItem
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Create the tdt dataset
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyDataset
*/
ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();

/**
* @ingroup AscendCL
* @brief Destroy the tdt dataset
*
* @param dataset [IN] pointer to the dataset
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateDataset
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);

/**
* @ingroup AscendCL
* @brief Get the data item
*
* @param dataset [IN] pointer to the dataset
* @param index [IN] index of the dataset
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtAddDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);

/**
* @ingroup AscendCL
* @brief Get the data item
*
* @param dataset [OUT] pointer to the dataset
* @param dataItem [IN] pointer to the data item
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtGetDataItem
*/
ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);

/**
* @ingroup AscendCL
* @brief Get the size of dataset
*
* @param dataset [IN] pointer to the dataset
*
* @retval 0 for failed
* @retval OtherValues success
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);

/**
* @ingroup AscendCL
* @brief Stop the channel
*
* @param handle [IN] pointer to the channel handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateChannel | acltdtDestroyChannel
*/
ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);

/**
* @ingroup AscendCL
* @brief Create the channel
*
* @param deviceId [IN] the device id
* @param name [IN] the channel's name
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtStopChannel | acltdtDestroyChannel
*/
ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);

/**
* @ingroup AscendCL
* @brief Destroy the channel
*
* @param handle [IN] pointer to the channel handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateChannel | acltdtStopChannel
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);

/**
* @ingroup AscendCL
* @brief Send tensor to device
*
* @param handle [IN] pointer to the channel handle
* @param dataset [IN] pointer to the dataset
* @param timeout [IN] to be reserved, now it must be -1
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
const acltdtDataset *dataset,
int32_t timeout);

/**
* @ingroup AscendCL
* @brief Receive tensor from device
*
* @param handle [IN] pointer to the channel handle
* @param dataset [OUT] pointer to the dataset
* @param timeout [IN] to be reserved, now it must be -1
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif //INC_EXTERNAL_ACL_ACL_TDT_H_


+ 75
- 0
inc/external/acl/acl/error_codes/ge_error_codes.h View File

@@ -0,0 +1,75 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif
static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;

#ifdef __cplusplus
} // namespace ge
#endif
#endif // INC_EXTERNAL_GE_GE_ERROR_CODES_H_

+ 102
- 0
inc/external/acl/acl/error_codes/rt_error_codes.h View File

@@ -0,0 +1,102 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 431
- 0
inc/external/acl/acl/ops/acl_cblas.h View File

@@ -0,0 +1,431 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

#include "acl/acl.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;

typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param dataTypeX [IN] datatype of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC If beta == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param dataTypeY [IN] datatype of vector y
* @param type [IN] computation type
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param dataTypeX [IN] datatype of vector x
* @param dataTypeY [IN] datatype of vector y
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param type [IN] computation type
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param handle [OUT] pointer to the pointer to the handle
* @param type [IN] computation type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension array used to store matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension array used to store matrix B
* @param dataTypeB [IN] datatype of matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC If beta == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension array used to store matrix C
* @param dataTypeC [IN] datatype of matrix C
* @param type [IN] computation type
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param dataTypeA [IN] datatype of matrix A
* @param dataTypeB [IN] datatype of matrix B
* @param dataTypeC [IN] datatype of matrix C
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @param type [IN] computation type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension used to store the matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension used to store the matrix C
* @param type [IN] computation type
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension used to store the matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension used to store the matrix C
* @param type [IN] computation type
* @param stream [IN] stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 2493
- 0
inc/external/acl/acl/ops/acl_dvpp.h
File diff suppressed because it is too large
View File


+ 351
- 0
inc/external/acl/acl/ops/acl_fv.h View File

@@ -0,0 +1,351 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

#include "acl/acl.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct aclfvInitPara aclfvInitPara;
typedef struct aclfvFeatureInfo aclfvFeatureInfo;
typedef struct aclfvRepoRange aclfvRepoRange;
typedef struct aclfvQueryTable aclfvQueryTable;
typedef struct aclfvSearchInput aclfvSearchInput;
typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/**
* @ingroup AscendCL
* @brief Create fv init param.
*
* @param fsNum [IN] The feature num
*
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum);

/**
* @ingroup AscendCL
* @brief Destroy fv init param.
*
* @par Function
* Can only destroy fv init param information created
* through aclfvCreateInitPara interface.
*
* @param initPara [IN] fv init param.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateInitPara
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara);

/**
* @ingroup AscendCL
* @brief set value for maxTopNumFor1N which in fv init param.
*
* @param initPara [IN|OUT] fv init param.
* @param maxTopNumFor1N [IN] maxTopNumFor1N value for init param.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N);

/**
* @ingroup AscendCL
* @brief set value for maxTopNumForNM which in fv init param.
*
* @param initPara [IN|OUT] fv init param.
* @param maxTopNumForNM [IN] maxTopNumForNM value for init param.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM);

/**
* @ingroup AscendCL
* @brief Create fv feature info.
*
* @param id0 [IN] The first level library id0
* @param id1 [IN] Secondary library id1
* @param offset [IN] The offset of the first feature in the library
* @param featureLen [IN] Single feature length
* @param featureCount [IN] Single feature count
* @param featureData [IN] Feature value list
* @param featureDataLen [IN] Feature value list length
*
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);

/**
* @ingroup AscendCL
* @brief Destroy fv feature info.
*
* @par Function
* Can only destroy fv feature info information created
* through aclfvCreateFeatureInfo interface.
*
* @param featureInfo [IN] fv feature info.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateFeatureInfo
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);

/**
* @ingroup AscendCL
* @brief Create fv repo range.
*
* @param id0Min [IN] id0 start value
* @param id0Min [IN] id0 max
* @param id1Min [IN] id0 start value
* @param id1Max [IN] id1 max
*
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
uint32_t id1Max);

/**
* @ingroup AscendCL
* @brief Destroy fv repo range.
*
* @par Function
* Can only destroy fv repo range information created
* through aclfvCreateRepoRange interface.
*
* @param repoRange [IN] fv repo range.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateRepoRange
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);

/**
* @ingroup AscendCL
* @brief Create query table.
*
* @param queryCnt [IN] Number of tables, the maximum number is 6
* @param tableLen [IN] Single table length, table length is 32KB
* @param tableData [IN] Feature value list
* @param tableDataLen [IN] The length of memory requested by the featureData pointer
*
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
uint32_t tableDataLen);

/**
* @ingroup AscendCL
* @brief Destroy query table.
*
* @par Function
* Can only destroy query table information created
* through aclfvCreateQueryTable interface.
*
* @param queryTable [IN] query table.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateQueryTable
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);

/**
* @ingroup AscendCL
* @brief Create search input.
*
* @param queryTable [IN] query table
* @param repoRange [IN] query repo range
* @param topk [IN] query topk
*
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
uint32_t topk);

/**
* @ingroup AscendCL
* @brief Destroy search input.
*
* @par Function
* Can only destroy search input information created
* through aclfvCreateSearchInput interface.
*
* @param searchInput [IN] search input.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateSearchInput
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);

/**
* @ingroup AscendCL
* @brief Create search result.
*
* @param queryCnt [IN] Retrieve the number of features
* @param resultNum [IN] The number of search results for each feature, the number is queryCnt
* @param resultNumDataLen [IN] resultNum memory length
* @param id0 [IN] Level 1 library id0
* @param id1 [IN] Secondary library id1
* @param resultOffset [IN] The offset of the bottom library corresponding
* to each feature retrieval result, total length topK * queryCnt
* @param resultDistance [IN] Distance, total length topK * queryCnt
* @param dataLen [IN] The memory size requested by
* id0\id1\reslutOffset\resultDistance
*
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/**
* @ingroup AscendCL
* @brief Destroy search result.
*
* @par Function
* Can only destroy search result information created
* through aclfvCreateSearchResult interface.
*
* @param searchResult [IN] search result.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateSearchResult
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);

/**
* @ingroup AscendCL
* @brief fv IP initialize.
*
* @param initPara [IN] fv init param.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara);

/**
* @ingroup AscendCL
* @brief release fv resources.
*
* @par Function
* Can only release fv resources created
* through aclfvInit interface.
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*
* @see aclfvInit
*/
ACL_FUNC_VISIBILITY aclError aclfvRelease();

/**
* @ingroup AscendCL
* @brief fv repo add.
*
* @param type [IN] repo add type
* @param featureInfo [IN] add feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief fv repo del.
*
* @param type [IN] repo delete type
* @param repoRange [IN] repo range information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief fv accurate del.
*
* @param featureInfo [IN] accurate delete feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief fv accurate modify.
*
* @param featureInfo [IN] accurate modify feature information
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief fv search.
*
* @param type [IN] search type
* @param searchInput [IN] search input
* @param searchRst [OUT] search result
* @param stream [IN] stream of task execute
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
aclfvSearchResult *searchRst, aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 46
- 39
inc/external/acl/acl_base.h View File

@@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
static const int ACL_ERROR_INVALID_OPP_PATH = 148049;

static const int ACL_ERROR_BAD_ALLOC = 200000;
static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
@@ -134,42 +135,42 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005;
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

/**
@@ -304,7 +305,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
aclFormat format);

/**
@@ -326,7 +329,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

/**
@@ -423,7 +427,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
size_t index,
size_t dimRangeNum,
int64_t *dimRange);

/**
@@ -460,7 +466,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
@@ -548,7 +554,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu
*
* @retval null for failed.
* @retval OtherValues success.
*/
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);

/**
@@ -559,7 +565,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc,
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);

/**
@@ -599,12 +605,13 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);
const char *fmt, ...);

#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 190
- 166
inc/external/acl/acl_mdl.h View File

@@ -27,19 +27,19 @@
extern "C" {
#endif

#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
@@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo;
typedef struct aclmdlConfigHandle aclmdlConfigHandle;

typedef enum {
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
} aclAippInputFormat;

typedef enum {
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
} aclmdlConfigAttr;

typedef enum {
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
} aclmdlInputAippType;

typedef struct aclmdlIODims {
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
} aclmdlIODims;

typedef struct aclAippDims {
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
} aclAippDims;

typedef struct aclmdlBatch {
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
} aclmdlBatch;

typedef struct aclmdlHW {
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
} aclmdlHW;

typedef struct aclAippInfo {
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
} aclAippInfo;

/**
@@ -339,7 +339,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize,
uint32_t *modelId);

/**
* @ingroup AscendCL
@@ -361,8 +362,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
size_t workSize, void *weightPtr, size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -385,9 +387,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
void *workPtr, size_t workSize, void *weightPtr,
size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -422,8 +424,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
size_t outputQNum);
const uint32_t *inputQ, size_t inputQNum,
const uint32_t *outputQ, size_t outputQNum);

/**
* @ingroup AscendCL
@@ -453,8 +455,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -829,11 +831,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
@@ -849,7 +851,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);

/**
@@ -863,7 +865,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);

/**
@@ -878,7 +880,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
int32_t srcImageSizeH);

@@ -898,10 +900,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
int32_t scfInputSizeH, int32_t scfOutputSizeW,
int32_t scfOutputSizeH, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
int8_t scfSwitch,
int32_t scfInputSizeW,
int32_t scfInputSizeH,
int32_t scfOutputSizeW,
int32_t scfOutputSizeH,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -919,9 +925,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
int8_t cropSwitch,
int32_t cropStartPosW,
int32_t cropStartPosH,
int32_t cropSizeW,
int32_t cropSizeH,
uint64_t batchIndex);

/**
@@ -940,7 +950,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
int32_t paddingSizeTop, int32_t paddingSizeBottom,
int32_t paddingSizeLeft, int32_t paddingSizeRight,
@@ -961,10 +971,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1,
int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -981,10 +994,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
float dtcPixelMinChn1, float dtcPixelMinChn2,
float dtcPixelMinChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
float dtcPixelMinChn0,
float dtcPixelMinChn1,
float dtcPixelMinChn2,
float dtcPixelMinChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1001,10 +1017,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1020,8 +1039,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1038,8 +1059,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1057,8 +1080,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
size_t index,
aclmdlInputAippType *type,
size_t *dynamicAttachedDataIndex);

/**
@@ -1075,7 +1100,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);

/**
@@ -1094,11 +1119,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind
*
* @retval ACL_SUCCESS The function is successfully executed
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
size_t *numInputs, aclTensorDesc **outputDesc,
size_t *numOutputs);
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
aclTensorDesc **outputDesc, size_t *numOutputs);

/**
* @ingroup AscendCL
@@ -1106,7 +1130,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlInitDump();

/**
@@ -1117,7 +1141,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);

/**
@@ -1126,7 +1150,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();

/**
@@ -1138,7 +1162,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);

/**
@@ -1148,7 +1172,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand
* @retval the aclmdlConfigHandle pointer
*
* @see aclmdlDestroyConfigHandle
*/
*/
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();

/**
@@ -1177,10 +1201,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
const void *attrValue, size_t valueSize);
const void *attrValue, size_t valueSize);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_

+ 82
- 37
inc/external/acl/acl_op.h View File

@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length);
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);
const uint8_t *values);

/**
* @ingroup AscendCL
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);
const int64_t *values);

/**
* @ingroup AscendCL
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);
const float *values);

/**
* @ingroup AscendCL
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);
const char **values);

/**
* @ingroup AscendCL
@@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
const int *numValues, const int64_t *const values[]);
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);

/**
* @ingroup AscendCL
@@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[], int numOutputs,
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[],
aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopHandle **handle);

/**
@@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
const aclDataBuffer *const inputs[], int numOutputs,
aclDataBuffer *const outputs[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
aclrtStream stream);

/**
@@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
void *binData, int binSize, aclopEngineType enginetype,
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
@@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
const void *args, uint32_t argSize);
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);

/**
* @ingroup AscendCL
@@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);

/**
* @ingroup AscendCL
@@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclopAttr *attr);


#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 28
- 19
inc/external/acl/acl_op_compiler.h View File

@@ -24,18 +24,21 @@
extern "C" {
#endif

typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
typedef enum aclCompileType {
ACL_COMPILE_SYS,
ACL_COMPILE_UNREGISTERED
} aclopCompileType;

typedef enum {
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
} aclCompileOpt;

/**
@@ -56,10 +59,15 @@ typedef enum {
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
int numOutputs, const aclTensorDesc *const outputDesc[],
const aclopAttr *attr, aclopEngineType engineType,
aclopCompileType compileFlag, const char *opPath);
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr,
aclopEngineType engineType,
aclopCompileType compileFlag,
const char *opPath);

/**
* @ingroup AscendCL
@@ -82,10 +90,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
const char *opPath, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -103,4 +112,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

+ 21
- 21
inc/external/acl/acl_prof.h View File

@@ -23,21 +23,21 @@
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008
#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef struct aclprofConfig aclprofConfig;
@@ -98,8 +98,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
@@ -139,7 +138,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
@@ -167,7 +167,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
@@ -219,8 +219,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
size_t opTypeLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opType, size_t opTypeLen);

/**
* @ingroup AscendCL
@@ -235,8 +235,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
size_t opNameLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opName, size_t opNameLen);

/**
* @ingroup AscendCL
@@ -293,4 +293,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_
#endif // INC_EXTERNAL_ACL_PROF_H_

+ 79
- 61
inc/external/acl/acl_rt.h View File

@@ -26,62 +26,62 @@ extern "C" {
#endif

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;
@@ -472,7 +472,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
/**
* @ingroup AscendCL
* @brief Queries an event's status
*
@@ -534,7 +534,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start,
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -557,7 +559,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -648,7 +652,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind);

/**
@@ -695,31 +702,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
aclrtStream stream);

/**
@@ -865,8 +879,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId,
aclrtGroupAttr attr, void *attrValue, size_t valueLen,
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
int32_t groupId,
aclrtGroupAttr attr,
void *attrValue,
size_t valueLen,
size_t *paramRetSize);

/**
@@ -929,4 +946,5 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_
#endif // INC_EXTERNAL_ACL_ACL_RT_H_


+ 19
- 12
inc/external/acl/acl_tdt.h View File

@@ -24,10 +24,10 @@ extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
@@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
aclDataType dataType, void *data, size_t size);
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
const int64_t *dims,
size_t dimNum,
aclDataType dataType,
void *data,
size_t size);

/**
* @ingroup AscendCL
@@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
const acltdtDataset *dataset,
int32_t timeout);

/**
@@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_TDT_H_
#endif //INC_EXTERNAL_ACL_ACL_TDT_H_


+ 14
- 0
inc/external/acl/error_codes/ge_error_codes.h View File

@@ -17,6 +17,20 @@
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <stddef.h>

#ifdef __cplusplus


+ 68
- 67
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -23,79 +23,80 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 138
- 41
inc/external/acl/ops/acl_cblas.h View File

@@ -23,9 +23,17 @@
extern "C" {
#endif

typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;

typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;

/**
* @ingroup AscendCL
@@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
aclDataType dataTypeX, aclDataType dataTypeY,
aclComputeType type, aclopHandle **handle);
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
@@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
@@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
int incy, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const void *alpha, const void *matrixA, int lda,
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclDataType dataTypeA,
aclDataType dataTypeB, aclDataType dataTypeC,
aclComputeType type, aclopHandle **handle);
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);


/**
* @ingroup AscendCL
@@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

/**
@@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const int32_t *alpha, const int8_t *matrixA, int lda,
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 222
- 147
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output

// Supported Pixel Format
enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
};

// Stream Format
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};

// Supported Channel Mode
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};

// Supported Border Type
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};

// Venc parameter type
enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
};

// Jpeg picture format
enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
};

/**
@@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed.
* @retval other success
*/
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
const void *param);
aclvencChannelDescParamType paramType, size_t length, const void *param);

/**
* @ingroup AscendCL
@@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
@@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
@@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
acldvppJpegFormat *format);

/**
@@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config, uint32_t *size);
const acldvppJpegeConfig *config,
uint32_t *size);

/**
* @ingroup AscendCL
@@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
* @ingroup AscendCL
@@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
@@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
aclrtStream stream);

/**
@@ -1734,9 +1769,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1759,9 +1797,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1786,11 +1827,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1818,8 +1862,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1837,8 +1884,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
aclrtStream stream);

/**
@@ -1856,8 +1906,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1912,8 +1965,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -1932,8 +1988,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -1954,8 +2012,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1977,8 +2037,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1990,7 +2053,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);

/**
* @ingroup AscendCL
@@ -2025,7 +2089,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);

/**
* @ingroup AscendCL
@@ -2122,7 +2187,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
uint32_t *len);
/**
* @ingroup AscendCL
@@ -2140,8 +2207,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2162,7 +2231,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
*
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
double value);

/**
@@ -2307,8 +2377,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2325,8 +2397,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2335,7 +2410,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();

/**
* @ingroup AscendCL
@@ -2392,7 +2467,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
*
* @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);

/**
* @ingroup AscendCL
@@ -2415,4 +2490,4 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 6
- 8
inc/external/acl/ops/acl_fv.h View File

@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/**
@@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount,
uint8_t *featureData, uint32_t featureDataLen);
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);

/**
* @ingroup AscendCL
@@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/**
* @ingroup AscendCL
@@ -350,4 +348,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 133
- 0
inc/external/hccl/hccl/hccl.h View File

@@ -0,0 +1,133 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file hccl.h
* @brief HCCL API
*/

#ifndef HCCL_H_
#define HCCL_H_

#include <hccl/hccl_types.h>
#include <acl/acl.h>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

/**
* @brief Initialize HCCL.
*
* @param clusterInfo A string identifying the cluster info file path, include file name.
* @param rank A integer identifying the identify for the rank.
* @param comm A pointer identifying the initialized communication resource.
* @return HcclResult
* @see HcclCommDestroy()
*/
extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);

/**
* @brief Get hccl root info.
*
* @param rootInfo A pointer identifying the hccl root info.
* @return HcclResult
*/
extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);

/**
* @brief Initialize HCCL with root info.
*
* @param nRanks A integer identifying the rank size of the cluster.
* @param rootInfo A struct identifying the hccl root info.
* @param rank A integer identifying the identify for the rank.
* @param comm A pointer identifying the initialized communication resource.
* @return HcclResult
* @see HcclCommDestroy()
*/
extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);

/**
* @brief AllReduce operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief Broadcast operator.
*
* @param buf A pointer identifying the data address of the operator.
* @param count An integer(u64) identifying the number of the data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param root An integer(u32) identifying the the root rank in the operator.
* @param comm A pointer identifying the communication resource based on
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);

/**
* @brief ReduceScatter operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param recvCount An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief AllGather operator.
*
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param sendCount An integer(u64) identifying the number of the input data.
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType,
HcclComm comm, aclrtStream stream);

/**
* @brief Destroy HCCL comm
*
* @param comm A pointer identifying the communication resource targetting
* @return HcclResult
* @see HcclCommInitClusterInfo()
*/
extern HcclResult HcclCommDestroy(HcclComm comm);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_H_

+ 101
- 0
inc/external/hccl/hccl/hccl_types.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file hccl_types.h
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;

/**
* @brief handle to HCCL communicator
*/
typedef void *HcclComm;

/**
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;

/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length

/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 102
- 0
inc/external/runtime/runtime/rt_error_codes.h View File

@@ -0,0 +1,102 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 60
- 0
third_party/fwkacllib/inc/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h View File

@@ -0,0 +1,60 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OP_TYPE_LIST_H_
#define AICPU_OP_TYPE_LIST_H_
enum OpKernelType {
TF_KERNEL,
CPU_KERNEL
};
enum ReturnCode {
OP_TYPE_NOT_SUPPORT,
FORMAT_NOT_SUPPORT,
DTYPE_NOT_SUPPORT
};
#pragma pack(push, 1)
//One byte alignment
struct SysOpInfo {
uint64_t opLen;
uint64_t opType;
OpKernelType kernelsType;
};
struct OpParamInfo {
uint64_t num;
uint64_t dtypeList;
uint64_t formatList;
};
struct SysOpCheckInfo {
uint64_t opListNum;
uint64_t offSetLen;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
struct SysOpCheckResp {
uint64_t opListNum;
bool isWithoutJson;
uint64_t returnCodeList;
uint64_t sysOpInfoList;
uint64_t opParamInfoList;
};
#pragma pack(pop)
#endif // AICPU_OP_TYPE_LIST_H_

+ 37
- 0
third_party/fwkacllib/inc/inc/aicpu/common/aicpu_task_struct.h View File

@@ -0,0 +1,37 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_TASK_STRUCT_H_
#define AICPU_TASK_STRUCT_H_

#include <cstdint>

namespace aicpu {

#pragma pack(push, 1)
struct AicpuParamHead
{
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
};
#pragma pack(pop)

} // namespace aicpu

#endif // AICPU_TASK_STRUCT_H_


+ 62
- 0
third_party/fwkacllib/inc/inc/cce/aicpu_engine.h View File

@@ -0,0 +1,62 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_ENGINE_H__
#define AICPU_ENGINE_H__

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
AE_STATUS_SUCCESS = 0,
AE_STATUS_BAD_PARAM = 1,
AE_STATUS_OPEN_SO_FAILED = 2,
AE_STATUS_GET_KERNEL_NAME_FAILED = 3,
AE_STATUS_INNER_ERROR = 4,
AE_STATUS_KERNEL_API_INNER_ERROR = 5,
AE_STATUS_END_OF_SEQUENCE = 6,
AE_STATUS_DUMP_FAILED = 7,
AE_STATUS_TASK_WAIT = 101,
AE_STATUS_RESERVED
} aeStatus_t;

/**
* @ingroup aicpu engine
* @brief aeCallInterface:
* a interface to call a function in a op kernfel lib
* @param [in] addr void *, should be STR_KERNEL * format
* @return aeStatus_t
*/
aeStatus_t aeCallInterface(void *addr);

/**
* @ingroup aicpu engine
* @brief aeBatchLoadKernelSo:
* a interface to load kernel so
* @param [in] loadSoNum load so number
* @param [in] soPaths load so paths
* @param [in] soNames load so names
* @return aeStatus_t
*/
aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);

#ifdef __cplusplus
}
#endif

#endif // AICPU_ENGINE_H__

+ 56
- 0
third_party/fwkacllib/inc/inc/cce/aicpu_engine_struct.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICPU_ENGINE_STRUCT_H__
#define AICPU_ENGINE_STRUCT_H__

#include "fwk_adpt_struct.h"

#ifdef __cplusplus
extern "C" {
#endif

/*
The different framwork we adapted for.
*/
typedef enum {
FMK_KERNEL_TYPE_TF = 0,
FMK_KERNEL_TYPE_CF = 10,
FMK_KERNEL_TYPE_PT = 20,
FMK_KERNEL_TYPE_RESERVED
} FwkkernelType_t;

#pragma pack(push, 1)
typedef struct {
uint32_t fwkKernelType; // FwkkernelType_t
union {
::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
} fwkKernelBase;
} STR_FWK_OP_KERNEL;
#pragma pack(pop)

#pragma pack(push, 1)
struct SessionInfo {
uint64_t sessionId;
uint64_t kernelId;
bool sessFlag;
};
#pragma pack(pop)

#ifdef __cplusplus
}
#endif
#endif // AICPU_ENGINE_STRUCT_H__

+ 31
- 0
third_party/fwkacllib/inc/inc/cce/blas_struct.h View File

@@ -0,0 +1,31 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CC_BLAS_STRUCT_API__
#define CC_BLAS_STRUCT_API__

#include <stdint.h>

typedef enum { CCBLAS_FILL_MODE_LOWER = 0, CCBLAS_FILL_MODE_UPPER = 1 } ccblasFillMode_t;

typedef enum {
CCBLAS_OP_N = 0,
CCBLAS_OP_T = 1,
} ccblasOperation_t;

typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t;

#endif // CC_BLAS_STRUCT_API__

+ 101
- 0
third_party/fwkacllib/inc/inc/cce/cce.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CCE_H__
#define CCE_H__

#include <stdint.h>
#include "cce_def.hpp"

namespace cce {

/**
* @ingroup cce
* @brief create cc handler
* @param [in|out] handle point of cc handler
* @return ccStatus_t
*/
ccStatus_t ccCreate(ccHandle_t *handle);

/**
* @ingroup cce
* @brief destroy cc handler
* @param [in] *handle cc handler
* @return ccStatus_t
*/
ccStatus_t ccDestroy(ccHandle_t *handle);

/**
* @ingroup cce
* @brief bind stream with specified cc handler
* @param [in] handle cc handler
* @param [in] streamId stream
* @return ccStatus_t
*/
ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t streamId);

/**
* @ingroup cce
* @brief get the stream from cc handler
* @param [in] handle cc handler
* @param [in|out] streamId point of stream
* @return ccStatus_t
*/
ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *streamId);

/**
* @ingroup cce
* @brief get the stream from cc handler
* @param [in] dataTypeTransMode mode of data type transform
* @param [in] inputData input data point
* @param [in] inputDataSize input data size
* @param [in|out] outputData output data point
* @param [in] outputDataSize output data size
* @return ccStatus_t
*/
ccStatus_t ccTransDataType(ccDataTypeTransMode_t dataTypeTransMode, const void *inputData, uint32_t inputDataSize,
void *outputData, const uint32_t outputDataSize);
/**
* @ingroup cce
* @brief cce sys init func
*/
void cceSysInit();

/**
* @ingroup cce
* @brief cce Log Start up func
*/
void cceLogStartup();

/**
* @ingroup cce
* @brief cce Log Shut down func
*/
void cceLogShutdown();

/**
* @ingroup cce
* @brief set the profiling on or off
* @param [in] const unsigned char* target: The engine gets it from ENV. Don't need care about it.
* @param const char* job_ctx: identifies profiling job
* @param [in] uint32_t flag: value: 0, on ; 1, off.
* @return ccStatus_t value: 0, success; 1, fail.
*/
ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag);

}; // namespace cce

#endif // CCE_H__

+ 152
- 0
third_party/fwkacllib/inc/inc/cce/cce_def.hpp View File

@@ -0,0 +1,152 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CCE_DEF_H__
#define CCE_DEF_H__

#include "runtime/rt.h"

namespace cce {

/**
* @ingroup cce
* @brief memory configure for fusion
*/
typedef struct TagCceFusionMemCfg {
uint64_t memAddr; /**< memAddr */
uint32_t memSize; /**< memSize */
uint32_t addrChangeFlag; /**< op data addr change flag. value:0,valid;1,not valid */
uint32_t poolFlag; /**< mempool flag : value:0,is valid; value: 1, not valid */
TagCceFusionMemCfg() {
memAddr = 0;
memSize = 0;
addrChangeFlag = 0;
poolFlag = 0;
}
} CceFusionMemCfg_t;
/**
* @ingroup cce
* @brief return value
*/
typedef enum tagCcStatus {
CC_STATUS_SUCCESS = 0, /**< succ */
CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */
CC_STATUS_BAD_PARAM = 3, /**< para check failed */
CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */
CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */
CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */
CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */
CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/
CC_STATUS_RESERVED /**< just for check */
} ccStatus_t;

/**
* @ingroup cce
* @brief original data type
*/
typedef enum tagCcDataType {
CC_DATA_FLOAT = 0, /**< float type */
CC_DATA_HALF, /**< fp16 type */
CC_DATA_INT8, /**< int8 type */
CC_DATA_INT32, /**< int32 type */
CC_DATA_UINT8, /**< uint8 type */
CC_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
CC_DATA_INT16, /**< int16 type */
CC_DATA_UINT16, /**< uint16 type */
CC_DATA_UINT32, /**< uint32 type */
CC_DATA_INT64, /**< int64 type */
CC_DATA_UINT64, /**< uint64 type */
CC_DATA_DOUBLE, /**< double type */
CC_DATA_BOOL, /**< bool type */
CC_DATA_DUAL, /**< dual output type */
CC_DATA_DUAL_SUB_INT8, /**< dual output int8 type */
CC_DATA_DUAL_SUB_UINT8, /**< dual output uint8 type */
CC_DATA_COMPLEX64,
CC_DATA_COMPLEX128,
CC_DATA_QINT8,
CC_DATA_QINT16,
CC_DATA_QINT32,
CC_DATA_QUINT8,
CC_DATA_QUINT16,
CC_DATA_RESERVED
} ccDataType_t;

/**
* @ingroup cce
* @brief save context of cce library
*/
typedef struct tagCcContext {
rtStream_t streamId;
uint32_t opIndex;
} ccContext_t;

typedef struct tagCcContext *ccHandle_t;

/**
* @ingroup cce
* @brief mode of data type transform
*/
typedef enum tagCcDataTypeTransMode {
CC_DATATYPE_TRANS_FLOAT_NO_TRANS = 0, /**< origin data is float, no trans */
CC_DATATYPE_TRANS_FP16_NO_TRANS, /**< origin data is fp16, no trans */
CC_DATATYPE_TRANS_INT8_NO_TRANS, /**< origin data is int8, no trans */
CC_DATATYPE_TRANS_FLOAT_TO_FP16, /**< data type float trans to fp16 */
CC_DATATYPE_TRANS_FP16_TO_FLOAT, /**< data type fp16 trans to float */
CC_DATATYPE_TRANS_FLOAT_TO_INT8, /**< data type float trans to int8 */
CC_DATATYPE_TRANS_INT8_TO_FLOAT, /**< data type int8 trans to float */
CC_DATATYPE_TRANS_UINT8_TO_FLOAT, /**< data type uint8 trans to float */
CC_DATATYPE_TRANS_UINT8_NO_TRANS, /**< origin data is uint8, no trans */
CC_DATATYPE_TRANS_INT32_NO_TRANS, /**< data type uint8 trans to float */
CC_DATATYPE_TRANS_UINT16_NO_TRANS, /** < origin data is uint16, no trans*/
CC_DATATYPE_TRANS_UINT16_TO_FLOAT, /** < data type uint16 trans to float*/
CC_DATATYPE_TRANS_MODE_RESERVED
} ccDataTypeTransMode_t;

typedef struct tagContextInfo {
ccHandle_t handle;
rtStream_t stream;
uint8_t *memBase;
uint64_t totalMemSize;
uint8_t *weightsMemBase;
uint64_t weightsMemSize;
uint8_t *weightsMemBaseHost;
} ContextInfo;

/**
* @ingroup cce
* @brief cce function parameter type
*/
typedef enum tagCcFuncType {
CC_FUSION_L2,
GLOBAL_MEMORY_CLEAR,
MAX_NUM,
} ccFuncParamType_t;

/**
* @ingroup cce
* @brief cce set function point state
*/
ccStatus_t ccSetFuncState(ccFuncParamType_t type, bool isOpen);

/**
* @ingroup cce
* @brief cce get function point state
*/
bool ccGetFuncState(ccFuncParamType_t type);

} // namespace cce
#endif // CCE_DEF_H__

+ 82
- 0
third_party/fwkacllib/inc/inc/cce/common/attr_list.hpp View File

@@ -0,0 +1,82 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ATTR_LIST_HPP__
#define ATTR_LIST_HPP__

#include "catch.hpp"

/**
* @ingroup util
* @brief frame Error Value
*/
#define ATTR_SUCCESS (0)
#define ATTR_ERROR_NULL_POINT (1)
#define ATTR_ERROR_ALREADY_EXIST (2)
#define ATTR_ERROR_NOT_EXIST (3)
#define ATTR_ERROR_BUFFER_NOT_ENOUGH (4)
#define ATTR_ERROR_BAD_PARAM (5)
#define ATTR_ERROR_ALLOC_FAIL (6)
#define ATTR_ERROR_FREE_FAIL (7)
#define ATTR_ERROR_RESERVED (8)

struct AttrListPrivate;
/**
* @ingroup util
* @brief attribute list
*/
class AttrList {
public:
AttrList();
AttrList(uint32_t initLen);
~AttrList();
AttrList(const AttrList &rhs) = delete;
AttrList &operator=(const AttrList &rhs);

public:
/**
* @ingroup util
* @brief add paras
* @param [in] attrId attribute id
* @param [in] attrLen length of attribute
* @param [in] attrValue point to attribute
* @return ccStatus_t
*/
uint32_t Add(uint32_t attrId, uint32_t attrLen, const void *attrValue);

/**
* @ingroup util
* @brief read paras
* @param [in] attrId attribute id
* @param [in] attrLen point to length of attribute
* @param [in] attrValue reference of point to attribute
* @return ccStatus_t
*/
uint32_t Get(uint32_t attrId, uint32_t &attrLen, const void *&attr_value) const;

/**
* @ingroup util
* @brief get the length of attribute list
* @return length of attribute
*/
uint32_t Length() const;

private:
AttrListPrivate *impl_;
uint32_t initLen_;
uint32_t Init();
};
#endif // ATTR_LIST_HPP__

+ 95
- 0
third_party/fwkacllib/inc/inc/cce/common/catch.hpp View File

@@ -0,0 +1,95 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CATCH_HPP_
#define CATCH_HPP_

#include <stdint.h>
#include <iostream>

#define ERROR_CODE() __catch_error_code
#define ERROR_LINE_NO() __catch_error_line_no
#define ERROR_PROC() __catch_error_line_no = __LINE__;

#define PROC \
uint32_t __catch_error_code = 0x7FFFFFCC; \
uint32_t __catch_error_line_no = 0xFFFFFFFF; \
{
#define END_PROC \
} \
__tabErrorCode:
#define THROW(errcode) \
{ \
__catch_error_code = (errcode); \
ERROR_PROC(); \
goto __tabErrorCode; \
}
#define EXEC(func) \
{ \
if (0 != (__catch_error_code = (func))) THROW(__catch_error_code) \
}
#define EXEC_EX1(func, error_code) \
{ \
if (0 != (func)) THROW(error_code) \
}
#define EXEC_EX(func, succRet, error_code) \
{ \
if (succRet != (__catch_error_code = (func))) THROW(error_code) \
}
#define ASSERT_EXEC(func, succRet) \
{ \
if (succRet != (__catch_error_code = (func))) /*GO_ASSERT_FALSE();*/ \
THROW(__catch_error_code) \
} \
}
#define NEW_ERROR_EXEC(errcode, func, succRet) \
{ \
if (succRet != (func)) { \
THROW(errcode) \
} \
}
#define JUDGE(errcode, expr) \
{ \
if (!(expr)) { \
THROW(errcode) \
} \
}
#define ASSERT_JUDGE(errcode, expr) \
{ \
if (!(expr)) { /*GO_ASSERT_FALSE();*/ \
THROW(errcode) \
} \
}
#define JUDGE_FALSE(errcode, expr) \
{ \
if (expr) { \
THROW(errcode) \
} \
}
#define JUDGE_CONTINUE(expr) \
{ \
if (expr) { \
continue; \
} \
}
#define CATCH_ERROR(errcode) if (__catch_error_code == (errcode)) { // ERROR_LOG();
#define CATCH_ALL_ERROR {
#define END_CATCH_ERROR }
#define FINAL \
__tabFinal:
#define END_FINAL /*GO_ASSERT_FALSE()*/ ;
#define GOTO_FINAL() goto __tabFinal;
#endif // CATCH_HPP_

+ 36
- 0
third_party/fwkacllib/inc/inc/cce/compiler_stub.h View File

@@ -0,0 +1,36 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef COMPILER_STUB_H__
#define COMPILER_STUB_H__

namespace cce {

/**
* @ingroup cce
* @brief compiler stub init func
*/
bool compilerStubInit();

/**
* @ingroup cce
* @brief compiler stub free func
*/
bool compilerStubFree();

}; // namespace cce

#endif // COMPILER_STUB_H__

+ 60
- 0
third_party/fwkacllib/inc/inc/cce/customize.h View File

@@ -0,0 +1,60 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CC_CUSTOMIZE_API__
#define CC_CUSTOMIZE_API__

#include <stdint.h>

#define CC_DEVICE_DIM_MAX 8
typedef enum tagOpTensorFormat
{
OP_TENSOR_FORMAT_NC1HWC0 = 0,
OP_TENSOR_FORMAT_ND,
OP_TENSOR_FORMAT_RESERVED,

} opTensorFormat_t;


typedef enum tagOpDataType
{
OP_DATA_FLOAT = 0, /**< float type */
OP_DATA_HALF, /**< fp16 type */
OP_DATA_INT8, /**< int8 type */
OP_DATA_INT32, /**< int32 type */
OP_DATA_UINT8, /**< uint8 type */
OP_DATA_HALF_UINT16_PROPOSAL, /**<mixed type for proposal*/
OP_DATA_RESERVED
} opDataType_t;

typedef struct tagOpTensor
{
// real dim info
opTensorFormat_t format;
opDataType_t data_type;
int32_t dim_cnt;
int32_t mm;
int32_t dim[CC_DEVICE_DIM_MAX];
} opTensor_t;

typedef opTensor_t tagCcAICPUTensor;
typedef void * rtStream_t;
typedef void (*aicpu_run_func)(opTensor_t **, void **, int32_t,
opTensor_t **, void **, int32_t, void *, rtStream_t);


#endif // CC_CUSTOMIZE_API__


+ 23
- 0
third_party/fwkacllib/inc/inc/cce/dnn.h View File

@@ -0,0 +1,23 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_H__
#define DNN_H__

#include "cce/dnn_base.h"
#include "cce/dnn_op.h"

#endif // DNN_H__

+ 676
- 0
third_party/fwkacllib/inc/inc/cce/dnn_base.h View File

@@ -0,0 +1,676 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_BASE_H__
#define DNN_BASE_H__

#include "cce/blas_struct.h"
#include "cce/customize.h"
#include "cce/dnn_base_def.hpp"

namespace cce {
/**
* @ingroup dnn
* @brief Minimum epsilon allowed to be used in the Batch Normalization formula
*/
#define CC_BN_MIN_EPSILON (1e-7)

#ifndef NULL
#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void *)0)
#endif
#endif

/**
* @ingroup dnn
* @brief max number of dimensions
*/
#define CC_DIM_MAX (8)

typedef struct cCTagL2LossDescriptor * ccL2LossDescriptor_t;

/**
* @ingroup dnn
* @brief mode of concatfive2fout
*/
typedef enum tagTransForLossMode {
CC_TRANS_FOR_BOX = 0,
CC_TRANS_FOR_SCORE,
} ccTransForLossMode_t;

/**
* @ingroup dnn
* @brief descriptor of concatfive2fout
*/
typedef struct tagCcConcatFive2Four_t *ccConcatFive2FourDescriptor_t;

}; /* end cce */

namespace cce {

/**
* @ingroup dnn
* @brief create descriptor of tensor
* @param [in|out] tensorDesc point to descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensorDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of tensor
* @param [in] *tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensorDesc);

/**
* @ingroup dnn
* @brief init tensor to 4d tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] format format of tensor
* @param [in] dataType data type in device
* @param [in] n batch size
* @param [in] c channels
* @param [in] h height of feature map
* @param [in] w width of feature map
* @return ccStatus_t
*/
ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t format,
ccDataType_t dataType,
int32_t n,
int32_t c,
int32_t h,
int32_t w);

/**
* @ingroup dnn
* @brief read 4d tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] dataType point to data type in device
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @param [in|out] nStride point to stride of n
* @param [in|out] cStride point to stride of c
* @param [in|out] hStride point to stride of h
* @param [in|out] wStride point to stride of w
* @return ccStatus_t
*/
ccStatus_t ccGetTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc,
ccDataType_t *dataType,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w,
int32_t *nStride,
int32_t *cStride,
int32_t *hStride,
int32_t *wStride);

/**
* @ingroup dnn
* @brief print 4d tensor (just in debug log mode)
* @param [in] tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccPrintTensor4dDescriptor(const ccTensorDescriptor_t tensorDesc);

/**
* @ingroup dnn
* @brief print Nd tensor (just in debug log mode)
* @param [in] tensorDesc descriptor of tensor
* @return ccStatus_t
*/
ccStatus_t ccPrintTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc);

/**
* @ingroup dnn
* @brief init tensor to Nd tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] dimCnt Dimension of the tensor
* @param [in] dimA Array of dimension dimCnt that contain the size of the tensor for every dimension. Size along unused dimensions should be set to 1.
* @return ccStatus_t
*/
ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensorDesc,
ccDataType_t dataType,
int32_t dimCnt,
int32_t dimA[]);

/**
* @ingroup dnn
* @brief read Nd tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in] dimCntReq point to data type in device
* @param [in|out] dataType point to data type in device
* @param [in|out] dimCnt Dimension of the tensor
* @param [in|out] dimA Array of dimension of at least dimCntReq that will be filled with the dimensions from the provided tensor descriptor.
* @param [in|out] strideA Array of dimension dimCntReq that contain the stride of the tensor for every dimension
* @return ccStatus_t
*/
ccStatus_t ccGetTensorNdDescriptor(const ccTensorDescriptor_t tensorDesc,
int32_t dimCntReq,
ccDataType_t *dataType,
int32_t *dimCnt,
int32_t dimA[],
int32_t strideA[]);

/**
* @ingroup dnn
* @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
* @param [in] xDesc descriptor of input tensor
* @param [in] x point to input data in host memory
* @param [in] dataTypeTransmode mode of data type transform
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y point to output data in host memory
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransTensor(const ccTensorDescriptor_t xDesc,
const void *x,
const ccTensorDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief get the format and dimcnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] format point to format
* @return ccStatus_t
*/
ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t *format);

/**
* @ingroup dnn
* @brief set the format and dimcnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] format point to format
* @return ccStatus_t
*/
ccStatus_t ccSetTensorFormat(ccTensorDescriptor_t tensorDesc,
ccTensorFormat_t format);


/**
* @ingroup dnn
* @brief get the RealDimCnt of Tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] RealDimCnt point to RealDimCnt
* @return ccStatus_t
*/
ccStatus_t ccGetTensorRealDimCnt(const ccTensorDescriptor_t tensorDesc,
int32_t *realDimCnt);

/**
* @ingroup dnn
* @brief set the RealDimCnt of Tensor
* @param [in|out] tensorDesc descriptor of tensor
* @param [in] RealDimCnt RealDimCnt to set
* @return ccStatus_t
*/
ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensorDesc,
int32_t realDimCnt);


/**
* @ingroup dnn
* @brief get data size of 4d tensor
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);

/**
* @ingroup dnn
* @brief get data size of 4d tensor which is align to 32B
* @param [in] tensorDesc descriptor of tensor
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensorDesc, uint32_t *size);


ccStatus_t ccSetTensorDataSize(ccTensorDescriptor_t xDesc, uint32_t size);

/**
* @ingroup dnn
* @brief get data size of 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in] groupNum number of group
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, uint32_t *size);


/**
* @ingroup dnn
* @brief read 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in|out] format point to format of filter
* @param [in|out] dataType point to data type in device
* @param [in|out] k point to number of output feature maps
* @param [in|out] c point to number of input feature maps
* @param [in|out] h point to height of filter
* @param [in|out] w point to width of filter
* @return ccStatus_t
*/
ccStatus_t ccGetFilter4dDescriptor(const ccFilterDescriptor_t filterDesc,
ccTensorFormat_t *format,
ccDataType_t *dataType,
int32_t *k,
int32_t *c,
int32_t *h,
int32_t *w);

ccStatus_t ccTransFilterFracZToNCHW(const ccFilterDescriptor_t wDesc,
const void *w,
ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief trans weight to fractal format, and trans data type together
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransFilter(const ccFilterDescriptor_t wDesc,
const void *w,
const ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes);

/**
* @ingroup dnn
* @brief trans weight to fractal format, and trans data type together
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] dataTypeTransmode mode of data type transform
* @param [in] yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes size of outputData
* @return ccStatus_t
*/
ccStatus_t ccTransFilterInt8(const ccFilterDescriptor_t wDesc,
const void *w,
ccFilterDescriptor_t yDesc,
void *y,
uint32_t ySizeInBytes,
ccDataType_t outputDataType);

/**
* @ingroup dnn
* @brief create descriptor of filter
* @param [in|out] filterDesc point to descriptor of filter
* @return ccStatus_t
*/
ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filterDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of filter
* @param [in] *filterDesc descriptor of filter
* @return ccStatus_t
*/
ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filterDesc);

/**
* @ingroup dnn
* @brief init conv descriptor to 2d conv
* @param [in|out] convDesc descriptor of convolution operator
* @param [in] mode mode of convolution
* @param [in] padMode mode of padding
* @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
* @param [in] strideH stride in height
* @param [in] strideW stride in width
* @param [in] dilationH dilation in height
* @param [in] dilationW dilation in width
* @return ccStatus_t
*/
ccStatus_t ccSetConvolution2dDescriptor(ccConvolutionDescriptor_t convDesc,
ccConvolutionMode_t mode,
ccPaddingMode_t padMode,
int32_t padHHead,
int32_t padHTail,
int32_t padWHead,
int32_t padWTail,
int32_t group,
int32_t strideH,
int32_t strideW,
int32_t dilationH,
int32_t dilationW);

/**
* @ingroup dnn
* @brief read 2d conv
* @param [in] convDesc descriptor of convolution operator
* @param [in|out] mode point to mode of convolution
* @param [in] padMode mode of padding
* @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value
* @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN
* @param [in|out] strideH point to stride in height
* @param [in|out] strideW point to stride in width
* @param [in|out] dilationH point to dilation in height
* @param [in|out] dilationW point to dilation in width
* @return ccStatus_t
*/
ccStatus_t ccGetConvolution2dDescriptor(const ccConvolutionDescriptor_t convDesc,
ccConvolutionMode_t *mode,
ccPaddingMode_t *padMode,
int32_t *padHHead,
int32_t *padHTail,
int32_t *padWHead,
int32_t *padWTail,
int32_t *group,
int32_t *strideH,
int32_t *strideW,
int32_t *dilationH,
int32_t *dilationW);

/**
* @ingroup dnn
* @brief get the output dimension info of 2d convolution
* @param [in] convDesc descriptor of convolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetConvolution2dForwardOutputDim(const ccConvolutionDescriptor_t convDesc,
const ccTensorDescriptor_t xDesc,
const ccFilterDescriptor_t wDesc,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w);

/**
* @ingroup dnn
* @brief create descriptor of convolution operator
* @param [in|out] filterDesc point to descriptor of convolution operator
* @return ccStatus_t
*/
ccStatus_t ccCreateConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of convolution operator
* @param [in] *convDesc descriptor of convolution operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyConvolutionDescriptor(ccConvolutionDescriptor_t *convDesc);

/**
* @ingroup dnn
* @brief check specific stride condition flag
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] transMark output condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvSpStrideCondCheck(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccTensorDescriptor_t biasDesc,
const ccFilterDescriptor_t wDesc,
uint32_t &transMark);

/**
* @ingroup dnn
* @brief special deconv stride trans
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] deconvStPtr descriptor of filter
* @param [in|out] xStPtr descriptor of trans input tensor
* @param [in|out] yStPtr descriptor of trans output tensor
* @param [in|out] wStPtr descriptor of trans filter tensor
* @param [in|out] wDesc descriptor of trasn filter
* @param [in|out] transMark condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvSpStrideDescTrans(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccTensorDescriptor_t biasDesc __attribute__((__unused__)),
const ccFilterDescriptor_t wDesc,
ccConvolutionDescriptor_t deconvStPtr,
ccTensorDescriptor_t xStPtr,
ccTensorDescriptor_t yStPtr,
ccFilterDescriptor_t wStPtr,
uint32_t transMark);

/**
* @ingroup dnn
* @brief check deconv goto aicore flag
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in] wDesc descriptor of filter
* @param [in] isGotoAicore out flag
* @param [in] transMark condition flag
* @return ccStatus_t
*/
ccStatus_t ccDeconvCheckGotoAiCore(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
const ccFilterDescriptor_t wDesc,
uint32_t *isGotoAicore,
uint32_t transMark);

/**
* @ingroup dnn
* @brief get the output dimension info of 2d Deconvolution
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetDeconvolution2dForwardOutputDim(const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc,
const ccFilterDescriptor_t wDesc,
int32_t *n,
int32_t *c,
int32_t *h,
int32_t *w);

/**
* @ingroup dnn
* @brief create descriptor of PAD
* @param [in|out] padDesc point to descriptor of pad
* @return ccStatus_t
*/
ccStatus_t ccCreatePadDescriptor(ccPadDescriptor_t *padDesc);

/**
* @ingroup dnn
* @brief destroy descriptor of PAD
* @param [in] *padDesc descriptor of PAD
* @return ccStatus_t
*/
ccStatus_t ccDestroyPadDescriptor(ccPadDescriptor_t *padDesc);

/**
* @ingroup dnn
* @brief set PADDesc
* @param [in|out] padDesc descriptor of PAD
* @param [in] padMode mode of PAD
* @param [in] padValue pad value of PAD
* @param [in] wleft width left pad of PAD
* @param [in] wright width right of PAD
* @param [in] htop higth pad of PAD
* @param [in] hbottom higth bottom pad of PAD
* @return ccStatus_t
*/
ccStatus_t ccSetPadDescriptor(ccPadDescriptor_t padDesc,
ccPadMode_t padMode,
float padValue,
int32_t htop,
int32_t hbottom,
int32_t wleft,
int32_t wright);

/**
* @ingroup dnn
* @brief read 2d pooling
* @param [in] poolingDesc descriptor of pooling operator
* @param [in|out] mode point to mode of pooling
* @param [in|out] maxpoolingNanOpt point to Nan propagation mode
* @param [in|out] windowH point to height of pooling window
* @param [in|out] windowW point to width of pooling window
* @param [in|out] padHHead point to zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
* @param [in|out] padHTail point to zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in|out] padWHead point to zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same value.
* @param [in|out] padWTail point to zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in|out] strideH point to stride in height
* @param [in|out] strideW point to stride in width
* @param [in|out] dataMode
* @param [in|out] ceilMode 0:Ceil 1:Floor
* @return ccStatus_t
*/
ccStatus_t ccGetPooling2dDescriptor(const ccPoolingDescriptor_t poolingDesc,
ccPoolingMode_t *mode,
ccPaddingMode_t *padMode,
ccNanPropagation_t *maxpoolingNanOpt,
int32_t *windowH,
int32_t *windowW,
int32_t *padHHead,
int32_t *padHTail,
int32_t *padWHead,
int32_t *padWTail,
int32_t *strideH,
int32_t *strideW,
int32_t *dataMode,
int32_t *ceilMode,
ccPooingFwdAlgo_t *algo);

ccStatus_t ccGetCompare5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetMaximum5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetMinimum5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
int32_t* dimCnt,
int32_t* dim,
int32_t dimLen);

ccStatus_t ccGetReduce5dOutputDim(const ccTensorDescriptor_t xDesc,
const ccIntArray_t* axis,
bool keepDims,
int32_t *dimCnt,
int32_t dim[],
int32_t dimLen);

/**
* @brief get out put descrition of slice tensor.
* @param [in] xDesc descriptor of input data
* @param [in] begin begin position of tensor
* @param [in] size size to slice
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetSliceOutputDim(
const ccTensorDescriptor_t xDesc,
const ccIntArray_t* begin,
const ccIntArray_t* size,
int32_t *dimCnt,
int32_t dim[],
int32_t dimLen);

/**
* @ingroup dnn
* @brief get strided slice output dim info.
* @param [in] xDesc descriptor of input tensor
* @param [in] stridedSliceDesc specifies the begin, end, strides of slice
* @param [in] attrDesc reserve for optional attributes.
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
const ccStridedSliceDescriptor_t stridedSliceDesc,
const ccStridedSliceAttrsDescriptor_t attrDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);

/**
* @ingroup dnn
* @brief get workspace size for softmax computation
* @param [in] handle cce handle
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes workSpace size in bytes
* @return ccStatus_t
*/
ccStatus_t ccGetSoftmaxForwardWorkspaceSize(ccHandle_t handle,
const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc,
uint32_t *sizeInBytes);

/**
* @ingroup dnn
* @brief set quantize algorithm type and quantize scale type (vector or scalar)
* @param [in] quantizeInfo descriptor of quantize parameters
* @param [in] quantAlgo enum type for quantize algorithm type
* @param [in] scaleType enum type for quantize scale type
* @param [in] reluflag flag for relu
* @return ccStatus_t
*/
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType);
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, ccQuantizeAlgo_t quantAlgo, ccScaleType_t scaleType, bool reluFlag);

}; /* end cce */

#endif // DNN_BASE_H__

+ 994
- 0
third_party/fwkacllib/inc/inc/cce/dnn_base_def.hpp View File

@@ -0,0 +1,994 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_BASE_HPP__
#define DNN_BASE_HPP__

#include "cce/cce_def.hpp"

namespace cce {

/**
* @ingroup dnn
* @brief tiling para
*/
typedef struct tagCcWeightCompressInfo {
uint32_t blockRow; /**< block row */
uint32_t blockCol; /**< block col */
uint32_t fractalK; /**< fractal K */
uint32_t fractalN; /**< fractal N */
uint32_t lastFractalK; /**< K of last fractal */
uint32_t lastFractalN; /**< N of last fractal */
uint32_t cubeSize; /**< cube's length */
uint32_t loadDir; /**< data load directtiono 0??col load 1:row load*/
} ccWeightCompressInfo_t;

/**
* @ingroup dnn
* @brief compress table info
*/
typedef struct tagCcWeightCompressTab {
uint16_t dataLen : 14; /**< 0: data length in 128 Byte */
uint16_t storeFlag : 1; /**< 0: compressed addr = original addr, 1: compressed addr = original addr + 256 Byte */
uint16_t dataType : 1; /**< 0: original data, 1: compressed data */
} ccWeightCompressTab_t;

/**
* @conv quantize dnn vector mode/scalar mode
*/
typedef enum {
QUANT_ALGO_NON_OFFSET = 0,
QUANT_ALGO_HALF_OFFSET = 1,
QUANT_ALGO_ALL_OFFSET = 2,
QUANT_ALGO_BUTT
} ccQuantizeAlgo_t;
typedef enum { SCALE_VEC = 0, SCALE_SCALAR = 1, SCALE_TYPE_BUTT } ccConvolutionScaleType_t, ccScaleType_t;

/**
* @conv quantize dnn sqrt mode/non sqrt mode
*/
typedef enum {
SCALE_NORMAL = 0,
SCALE_SQRT = 1,
SCALE_VALUE_MODE_BUTT
} ccConvolutionScaleValueMode_t,
ccScaleValueMode_t;

typedef struct {
float scaleW;
float scaleD;
float scaleDNext;
uint8_t offsetW;
uint8_t offsetD;
uint8_t offsetDNext;
} ccQuantAllOffsetPara_t;

typedef struct tagCcVecQuantizePara {
float scale;
uint16_t offset;
uint16_t rrv; // 32byte align
} ccVecQuantizePara_t;

/**
* @ingroup dnn
* @brief format of tensor
*/
typedef enum tagCcTensorFormat {
CC_TENSOR_NCHW = 0, /**< NCHW */
CC_TENSOR_NHWC, /**< NHWC */
CC_TENSOR_ND, /**< Nd Tensor */
CC_TENSOR_NC1HWC0, /**< NC1HWC0 */
CC_TENSOR_FRACTAL_Z, /**< FRACTAL_Z */
CC_TENSOR_NC1C0HWPAD,
CC_TENSOR_NHWC1C0,
CC_TENSOR_FSR_NCHW,
CC_TENSOR_FRACTAL_DECONV,
CC_TENSOR_C1HWNC0,
CC_TENSOR_FRACTAL_DECONV_TRANSPOSE,
CC_TENSOR_FRACTAL_DECONV_SP_STRIDE_TRANS,
CC_TENSOR_NC1HWC0_C04, /**< NC1HWC0, C0 =4*/
CC_TENSOR_FRACTAL_Z_C04, /**< FRACZ?????C0 =4 */
CC_TENSOR_CHWN,
CC_TENSOR_FRACTAL_DECONV_SP_STRIDE8_TRANS,
CC_TENSOR_HWCN,
CC_TENSOR_NC1KHKWHWC0, /** < KH,KW kernel h& kernel w maxpooling max output format*/
CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS = 20,
CC_TENSOR_HASHTABLE_LOOKUP_KEYS,
CC_TENSOR_HASHTABLE_LOOKUP_VALUE,
CC_TENSOR_HASHTABLE_LOOKUP_OUTPUT,
CC_TENSOR_HASHTABLE_LOOKUP_HITS = 24,
CC_TENSOR_C1HWNCoC0, /**< C1,H,W,N,Co,C0 6D diagonal format*/
CC_TENSOR_RESERVED
} ccTensorFormat_t;

/**
* @ingroup dnn
* @brief format of compare
*/
typedef enum tagCcCompareType {
CC_COMPARE_TYPE_LESS = 0,
CC_COMPARE_TYPE_LESS_EQUAL,
CC_COMPARE_TYPE_NOT_EQUAL,
CC_COMPARE_TYPE_EQUAL,
CC_COMPARE_TYPE_GREATER,
CC_COMPARE_TYPE_GREATER_EQUAL,
CC_COMPARE_TYPE_RESERVED
} ccCompareType_t;

/**
* @ingroup dnn
* @brief propagate Nan
*/
typedef enum tagCcNanPropagation {
CC_NAN_NOT_PROPAGATE = 0, /**< Nan numbers are not propagated */
CC_NAN_PROPAGATE, /**< Nan numbers are propagated */
CC_NAN_PROPAGATE_RESERVED
} ccNanPropagation_t;

/**
* @ingroup dnn
* @brief algorithm of convolution forward
*/
typedef enum tagCcConvolutionFwdAlgo {
CC_CONVOLUTION_FWD_ALGO_GEMM = 0, /**< matrix gemm algo */
CC_CONVOLUTION_FWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
CC_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,
CC_CONVOLUTION_FWD_ALGO_RESERVED
} ccConvolutionFwdAlgo_t;

#define ccCorrelationFwdAlgo_t ccConvolutionFwdAlgo_t

typedef enum tagCcConvolutionBwdAlgo {
CC_CONVOLUTION_BWD_ALGO_GEMM = 0, /**< matrix gemm algo */
CC_CONVOLUTION_BWD_ALGO_WINOGRAD, /**< Winograd Transform algo */
CC_CONVOLUTION_BWD_ALGO_GEMM_CO2IMG,
CC_CONVOLUTION_BWD_FILTER_GEM_ALGO,
CC_CONVOLUTION_BWD_ALGO_RESERVED
} ccConvolutionBwdAlgo_t;

#define ccCorrelationBwdAlgo_t ccConvolutionBwdAlgo_t

/**
* @ingroup dnn
* @brief algorithm of FullConnect forward
*/
typedef enum tagCcFullConnectFwdAlgo {
CC_FULLCONNECT_FWD_ALGO_HALF = 0,
CC_FULLCONNECT_FWD_ALGO_FLOAT32
} ccFullConnectFwdAlgo_t;

/**
* @ingroup dnn
* @brief mode of convolution
*/
typedef enum tagCcConvolutionMode {
CC_CONV_CONVOLUTION = 0, /**< math convolution */
CC_CONV_CROSS_CORRELATION, /**< cross-correlation convolution */
CC_CONV_DECONVOLUTION, /**< deconvolution, also named transposed convolution*/
CC_CONV_MODE_DEPTHWISE, /**< depthwise convolution*/
CC_CONV_MODE_RESERVED
} ccConvolutionMode_t;

#define ccCorrelationMode_t ccConvolutionMode_t

/**
* @ingroup dnn
* @brief mode of pooling
*/
typedef enum tagCcPoolingMode {
CC_POOLING_MAX = 0, /**< max pooling */
CC_POOLING_AVG, /**< average pooling */
CC_POOLING_L2, /**< L2 pooling */
CC_POOLING_AVG_FP32, /**< average pooling for training */
CC_POOLING_RESERVED
} ccPoolingMode_t;

/**
* @ingroup dnn
* @brief L0C accumulate algo of AvgPooling
*/
typedef enum tagCcPooingFwdAlgo {
CC_POOLING_FWD_ALGO_HALF = 0, // accumulate in L0c with FP16
CC_POOLING_FWD_ALGO_FLOAT32 // accumulate in L0c with FP32
} ccPooingFwdAlgo_t;

/**
* @ingroup dnn
* @brief mode of momentum
*/
typedef enum tagMomentumAlgo {
CC_MOMENTUM_UPDATE_FP32 = 0, /**< FP32 out */
CC_MOMENTUM_UPDATE_FP32_FP16, /**< FP32 and FP16 out */
CC_MOMENTUM_UPDATE_FP32_NESTEROV,
CC_MOMENTUM_UPDATE_FP32_FP16_NESTEROV,
CC_MOMENTUM_RESERVED
} ccMomentumAlgo_t;

/**
* @ingroup dnn
* @brief mode of partitionStrategy
*attention: if need to motify this struct,please must motify dPartitionStrategy_t
*/
typedef enum tagCcPartitionStrategy {
CC_PARTITION_STRATEGY_MOD = 0, /**< mod */
CC_PARTITION_STRATEGY_DIV, /**< div */
CC_PARTITION_STRATEGY_RESERVED
} ccPartitionStrategy_t;

/**
* @ingroup dnn
* @brief mode of assignOp
*/
typedef enum tagCcAssignOpMode {
CC_ASSIGN_ADD = 0, /**< assign add */
CC_ASSIGN_SUB, /**< assign sub */
CC_ASSIGN_RESERVED
} ccAssignOpMode_t;

/**
* @ingroup dnn
* @brief mode of arcSinCos
*/
typedef enum tagCcArcSinCosMode {
CC_ARCUS_SIN = 0, /**< asin */
CC_ARCUS_COS, /**< acos */
CC_ARCUS_RESERVED
} ccArcSinCosMode_t;

/**
* @ingroup dnn
* @brief mode of padding
*/
typedef enum tagCcPaddingMode {
CC_PADDING_CEIL = 0,
CC_PADDING_DIRECTASSIGN,
CC_PADDING_VALID,
CC_PADDING_SAME, /**< Padding values of 0 are always used */
CC_PADDING_CEIL_NEW, /*new ceil,use for backward compatibility*/
CC_PADDING_VALID_NEW, /*new valid,use for backward compatibility*/
CC_PADDING_SAME_NEW, /*new same,use for backward compatibility*/
CC_PADDING_RESERVED
} ccPaddingMode_t;

/**
* @ingroup dnn
* @brief mode of activation
*/
typedef enum tagCcActivationMode {
CC_ACTIVATION_SIGMOID = 0, /**< sigmoid */
CC_ACTIVATION_RELU, /**< ReLU */
CC_ACTIVATION_TANH, /**< tanh */
CC_ACTIVATION_CLIPPED_RELU, /**< clipped ReLU */
CC_ACTIVATION_ELU, /**< ELU */
CC_ACTIVATION_LEAKY_RELU,
CC_ACTIVATION_ABS, /**< Abs */
CC_ACTIVATION_RELU1, /**< relu1 */
CC_ACTIVATION_SOFTSIGN, /**< softsign */
CC_ACTIVATION_SOFTPLUS, /**< softplus */
CC_ACTIVATION_HARDSIGMOID, /**< hardsigmoid*/
CC_ACTIVATION_THRESHOLD_RELU, /**< threshold */
CC_ACTIVATION_SELU, /**< selu */
CC_ACTIVATION_LINEAR, /**< linear */
CC_ACTIVATION_RELU6, /**< relu6 */
CC_ACTIVATION_RESERVED
} ccActivationMode_t;

/**
* @ingroup dnn
* @brief mode of logical op mode
*/
typedef enum tagCcLogicalOpMode {
CC_LOGICAL_OP_NOT = 0, /**logical not**/
CC_LOGICAL_OP_AND, /**logical and**/
CC_LOGICAL_OP_OR, /**logical or**/
CC_LOGICAL_OP_XOR, /**logical xor**/
CC_LOGICAL_OP_RESERVED
} ccLogicalOpMode_t;

/**
* @ingroup dnn
* @brief mode of batchnorm
*/
typedef enum tagCcBatchNormMode {
CC_BATCHNORM_PER_ACTIVATION = 0, /**< bnScale, bnBias tensor dims are 1xCxHxW */
CC_BATCHNORM_SPATIAL, /**< bnScale, bnBias tensor dims are 1xCx1x1 */
CC_BATCHNORM_RESERVED
} ccBatchNormMode_t;

/**
* @ingroup dnn
* @brief mode of instancenorm
*/
typedef enum tagCcInstanceNormMode {
CC_INSTANCENORM_PER_ACTIVATION = 0, /**< inScale, inBias tensor dims are NxCxHxW */
CC_INSTANCENORM_SPATIAL, /**< inScale, inBias tensor dims are NxCx1x1 */
CC_INSTANCENORM_RESERVED
} ccInstanceNormMode_t;
/**
* @ingroup dnn
* @brief mode of layernorm
*/
typedef enum tagCcLayerNormMode {
CC_LAYERNORM_PER_ACTIVATION = 0, /**< lnScale, lnBias tensor dims are 1xCxHxW */
CC_LAYERNORM_SPATIAL, /**< lnScale, lnBias tensor dims are Nx1x1x1 */
CC_LAYERNORM_RESERVED
} ccLayerNormMode_t;

/**
* @ingroup dnn
* @brief softmax algorithm
*/
typedef enum tagCcSoftmaxAlgo {
CC_SOFTMAX_FAST = 0, /**< straightforward implementation */
CC_SOFTMAX_ACCURATE, /**< subtract max from every point to avoid overflow */
CC_SOFTMAX_LOG, /**< perform the Log softmax operation to avoid overflow */
CC_SOFTMAX_ACCURATE_FP32, /**< accurate mode for fp32 */
CC_SOFTMAX_RESERVED
} ccSoftmaxAlgo_t;

/**
* @ingroup dnn
* @brief softmax mode
*/
typedef enum tagCcSoftmaxMode {
CC_SOFTMAX_MODE_INSTANCE = 0, /**< compute the softmax over all C, H, W for each N */
CC_SOFTMAX_MODE_CHANNEL, /**< compute the softmax over all C for each H, W, N */
CC_SOFTMAX_MODE_HEIGHT, /**< compute the softmax over all H for each N, C, W */
CC_SOFTMAX_MODE_WIDTH, /**< compute the softmax over all W for each N, C, H */
CC_SOFTMAX_MODE_CLASS, /**< special mode: compute the softmax over all class for each N, H ,W */
CC_SOFTMAX_MODE_RESERVED
} ccSoftmaxMode_t;

/**
* @ingroup dnn
* @brief cross entropy mode
*/
typedef enum tagCcCrossEntropyMode {
CC_CROSS_ENTROPY_SPARSE_WITHOUT_REDUCTION = 0, /**< compute the sparse cross entropy without fused reduce mean */
CC_CROSS_ENTROPY_SPARSE_WITH_REDUCTION, /**< compute the sparse cross entropy with fused reduce mean*/
CC_CROSS_ENTROPY_WITHOUT_REDUCTION, /**< compute the cross entropy without fused reduce mean */
CC_CROSS_ENTROPY_WITH_REDUCTION, /**< compute the cross entropy with fused reduce mean */
CC_CROSS_ENTROPY_RESERVED
} ccCrossEntropyMode_t;

/**
* @ingroup dnn
* @brief concat mode
*/
typedef enum tagCcConcatMode {
CC_CONCAT_BY_BATCH = 0, /**< concat by batch */
CC_CONCAT_BY_FEATURE, /**< concat by feature */
CC_CONCAT_BY_HEIGHT, /**< concat by height */
CC_CONCAT_BY_WIDTH, /**< concat by width */
CC_CONCAT_BY_FLATTEN,
CC_CONCAT_RESERVED
} ccConcatMode_t;

/**
* @ingroup dnn
* @brief eltwise mode
*/
typedef enum tagCcEltwiseMode {
CC_ELTWISE_PROD = 0, /**< prod */
CC_ELTWISE_SUM, /**< sum */
CC_ELTWISE_MAX, /**< max */
CC_ELTWISE_RESERVED
} ccEltwiseMode_t;

/**
* @ingroup dnn
* @brief depthwise filter type
*/
typedef enum tagCcDepthwiseFilterType {
CC_Depthwise_FILTER_DEPTHWISE = 0, /**< depthwise filter */
CC_Depthwise_FILTER_POINTWISE, /**< pointwise filter */
CC_Depthwise_FILTER_RESERVED
} ccDepthwiseFilterType_t;

/**
* @ingroup dnn
* @brief sampler type
*/
typedef enum tagCcSamplerType {
CC_SAMPLER_BILINEAR = 0, /**< bilinear sampler algo */
CC_SAMPLER_RESERVED
} ccSamplerType_t;

/**
* @ingroup dnn
* @brief NMS type
*/
typedef enum tagCcNmsType {
CC_NMS_IOU = 0, /**< nms operation type, only IOU for now */
CC_NMS_RESERVED
} ccNmsType_t;

/**
* @ingroup dnn
* @brief Box Code type
*/
typedef enum tagCcBoxCodeType {
CC_BOX_CORNER = 1, /**< Box CodeType in detection nets */
CC_BOX_CENTER_SIZE,
CC_BOX_CORNER_SIZE,
CC_BOX_RESERVED
} ccBoxCodeType_t;

/**
* @ingroup dnn
* @brief split mode
*/
typedef enum tagSplitMode {
CC_SPLIT_MODE_SLICE = 0, /**< spilt data of one dim*/
CC_SPLIT_MODE_DUPLICATE, /**< copy data of one dim*/
CC_SPLIT_MODE_RESERVED
} ccSplitMode_t;

/**
* @ingroup dnn
* @brief mode of LRN
*/
typedef enum tagCcLRNMode {
CC_LRN_CROSS_CHANNELS = 0, /**< CROSS_CHANNELS */
CC_LRN_WITHIN_CHANNELS, /**< WITHIN_CHANNELS */
CC_LRN_RESERVED
} ccLRNMode_t;

/**
* @ingroup dnn
* @brief format of AIPP input
*/
typedef enum tagCcAippInputFormat {
CC_AIPP_INPUT_YUV420SP_U8 = 1,
/**< YUV420SP */ // mini,lite,tiny
CC_AIPP_INPUT_XRGB8888_U8,
/**< XRGB8888 */ // mini,lite,tiny
CC_AIPP_INPUT_NC1HWC0DI_FP16,
/**< NC1HWC0DI_FP16 */ // mini
CC_AIPP_INPUT_NC1HWC0DI_S8,
/**< NC1HWC0DI_S8 */ // mini
CC_AIPP_INPUT_RGB888_U8,
/**< RGB888 */ // mini,tiny
CC_AIPP_INPUT_ARGB8888_U8,
/**< ARGB8888 */ // lite
CC_AIPP_INPUT_YUYV_U8,
/**< YUYV */ // lite
CC_AIPP_INPUT_YUV422SP_U8,
/**< YUV422SP */ // lite
CC_AIPP_INPUT_AYUV444_U8,
/**< AYUV444 */ // lite
CC_AIPP_INPUT_YUV400_U8,
/**< YUV400 */ // mini,lite,tiny
CC_AIPP_INPUT_RESERVED
} ccAippInputFormat_t;

/**
* @ingroup dnn
* @brief mode of AIPP padding
*/
typedef enum tagCcAippPaddingMode {
CC_AIPP_PAD_DEFAULT_VALUE = 1, /**< CONFIG_VALUE */
CC_AIPP_PAD_LINE_COPY, /**< ROW_COL_COPY */
CC_AIPP_PAD_BLOCK_COPY, /**< BLOCK_COPY */
CC_AIPP_PAD_MIRROR_COPY, /**< MIRROR_COPY */
CC_AIPP_PAD_RESERVED
} ccAippPaddingMode_t;

/**
* @ingroup dnn
* @brief format of cmp type
*/
typedef enum tagCcccCMPType {
CC_CMP_EQ = 0,
CC_CMP_NE,
CC_CMP_LT,
CC_CMP_GT,
CC_CMP_GE,
CC_CMP_LE,
CC_CMP_TYPE_RESERVED
} ccCMPType_t;

/**
* @ingroup dnn
* @brief mode of logical op mode
*/
typedef enum tagCcResultType {
CC_Result_AND = 0, /**logical and**/
CC_Result_OR, /**logical or**/
CC_Result_RESERVED
} ccResultType_t;

/**
* @ingroup dnn
* @brief method of crop_and_resize operator
*/
typedef enum tagCcResizeMethod {
CC_RESIZE_METHOD_BILINEAR = 0, /** BILINEAR */
CC_RESIZE_METHOD_NEAREST, /** NEAREST */
CC_RESIZE_METHOD_RESERVED
} ccResizeMethod_t;

/**
* @ingroup dnn
* @brief mode of calculating new size of the images
*/
typedef enum tagCcResizeOutputDimMode {
RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0, /**< Output dimension specified by zoom factor*/
RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR, /**< specified by shrink factor */
RESIZE_OUTPUT_DIM_EXPLICIT, /**< specified explicitly */
RESIZE_OUTPUT_DIM_RESERVED
} ccResizeOutputDimMode_t;

typedef enum tagCcYoloVersion {
CC_YOLO_V2 = 1, /**< YOLOv2 */
CC_YOLO_V3, /**< YOLOv3 */
CC_YOLO_RESERVED
} ccYoloVersion_t;

typedef enum tagCcAttentionAlgo {
// bahdanau-attention, for detail:https://pravn.wordpress.com/2017/11/14/bahdanau-attention/
CC_ATTENTION_ALGO_BAHDANAU = 0,
CC_ATTENTION_ALGO_NORMAL_BAHDANAU = 1,
CC_ATTENTION_ALGO_LUONG = 2,
CC_ATTENTION_ALGO_SCALED_LUONG = 3,
CC_ATTENTION_ALGO_RESERVED
} AttentionAlgo_t;
/**
* @ingroup dnn
* @brief desc of data layout
*/
typedef enum ccEmAttnDecoderDataLayout {
CC_ATTN_5D_TX1BX, //[max_time,Xt1,1,batch_size,Xt0]
CC_ATTN_5D_BTX1X, //[batch_size*max_time,Xt1,1,1,Xt0]
CC_ATTN_DL_RESERVED
} ccEmAttnDecoderDataLayout_t;

/**
* @ingroup dnn
* @brief operation of Reduce
*/
typedef enum {
CC_REDUCE_OP_SUM = 0, /**< sum */
CC_REDUCE_OP_MEAN, /**< mean */
CC_REDUCE_OP_PROD, /**< product */
CC_REDUCE_OP_ALL, /**< logical and */
CC_REDUCE_OP_ABS_SUM, /**< absolute sum */
CC_REDUCE_OP_SQUARE_SUM, /**< square sum */
CC_REDUCE_OP_MAX, /**< max */
CC_REDUCE_OP_MIN, /**< min */
CC_REDUCE_OP_LOGSUMEXP, /**< logsumexp */
CC_REDUCE_OP_INVALID
} ccReduceOpType_t;

/**
* @ingroup dnn
* @brief desc of tpye layout
*/
typedef enum {
LSH_PROJECTION_TYPE_UNKNOWN = 0,
LSH_PROJECTION_TYPE_SPARSE = 1,
LSH_PROJECTION_TYPE_DENSE = 2
} LSHProjectionType;

/**
* @ingroup dnn
* @brief activation para
*/
typedef struct tagCcActivationRelu {
double reluCoef; /* reluCoef for clipped RELU */
ccNanPropagation_t reluNanOpt;
} ccActivationRelu_t;
typedef union tagCcActivationPara {
ccActivationRelu_t actionRelu; /* relu Coef and NanOpt for clipped RELU */
double eluAlpha; /* eluAlpha for ELU */
float leakyReluNegativeSlope;
} ccActivationPara_u;

/**
* @ingroup dnn
* @bref mode of square
*/
typedef enum tagCcSquareMode {
CC_SQUARE_2 = 0, /* square */
} ccSquareMode_t;

/**
* @ingroup dnn
* @brief append operation type
*/
typedef enum tagCcOpType {
CC_OP_TYPE_NO_RELU = 0,
CC_OP_TYPE_RELU = 1,
CC_OP_TYPE_RELU6 = 2,
CC_OP_TYPE_INVALID
} ccOpType_t;

/**
* @ingroup dnn
* @brief struct define of fill operator type.
*/
typedef enum tagCcFillOpType {
CC_CONSTANT = 0,
CC_RANGE,
CC_LENGTH_RANGE,
CC_GIVEN_TENSOR,
CC_DIAGONAL,
CC_UNIFORM,
CC_UNIFORM_INT,
CC_UNIQUE_UNIFORM,
CC_GAUSSIAN,
CC_XAVIER,
CC_MSRA,
CC_FILL_OP_TYPE_RESERVED
} ccFillOpType_t;

/**
* @ingroup dnn
* @brief loss function reduction mode
*/
typedef enum tagCcLossReduction {
CC_LOSS_REDUCTION_NONE = 0,
CC_LOSS_REDUCTION_SUM,
CC_LOSS_REDUCTION_RESERVED
} ccLossReduction_t;

/**
* @ingroup dnn
* @brief max size of ccIntArray
*/
#define CC_INT_ARRAY_MAX_SIZE (8)

/**
* @ingroup dnn
* @brief struct define of int array less than 8.
*/
typedef struct tagIntArray {
uint32_t size;
int32_t value[CC_INT_ARRAY_MAX_SIZE];
} ccIntArray_t;

typedef enum tagCcPadMode {
CC_PAD_CONSTANT = 0, /*CONSTANT */
CC_PAD_REFLECT, /*REFLECT */
CC_PAD_SYMMETRIC, /*SYMMETRIC*/
CC_PAD_EDGE, /*EDGE */
CC_PAD_MODE_RESERVED
} ccPadMode_t;

/*
* @ingroup dnn
* @brief pad operation of extractImagePatches
*/
typedef enum {
CC_EXTRACT_IMAGE_PATCHES_PAD_VALID = 1,
CC_EXTRACT_IMAGE_PATCHES_PAD_SAME,
CC_EXTRACT_IMAGE_PATCHES_PAD_RESERVED
} ccExtractImagePatchesPadType_t;

/**
* @ingroup dnn
* @brief image dimensions of aipp input
*/
#define CC_AIPP_IMG_DIM (2)

/**
* @ingroup dnn
* @brief image channel number of aipp input
*/
#define CC_AIPP_IMG_CHN_NUM (4)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion matrix
*/
#define CC_AIPP_CSC_MATRIX_DIM (9)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion bias
*/
#define CC_AIPP_CSC_BIAS_DIM (3)

/**
* @ingroup dnn
* @brief struct define of AIPP operator
*/

typedef struct tagCcAipp {
ccAippInputFormat_t inputFormat;
ccDataType_t outputFormat;
int32_t srcImageSize[CC_AIPP_IMG_DIM];
int32_t loadStartPos[CC_AIPP_IMG_DIM];
int32_t loadSize[CC_AIPP_IMG_DIM];
int32_t scfInputSize[CC_AIPP_IMG_DIM];
int32_t scfOutputSize[CC_AIPP_IMG_DIM];
int32_t cscMatrix[CC_AIPP_CSC_MATRIX_DIM];
int32_t cscOutputBias[CC_AIPP_CSC_BIAS_DIM];
int32_t cscInputBias[CC_AIPP_CSC_BIAS_DIM];
int32_t dtcPixelMean[CC_AIPP_IMG_CHN_NUM];
float dtcPixelMin[CC_AIPP_IMG_CHN_NUM];
float dtcPixelVarReci[CC_AIPP_IMG_CHN_NUM];
ccAippPaddingMode_t paddingMode;
int32_t paddingSize[CC_AIPP_IMG_DIM * 2]; // up,down,left,right
float cpaddingVaule;
bool cscSwitch; // 0:off,1:on
bool scfSwitch; // 0:off,1:on
bool rbuvSwapSwitch;
bool axSwapSwitch;
bool singleLineMode;
bool cscConfigFlag;
bool dtcConfigFlag;
bool padConfigFlag;
bool commConfigFlag;
bool aippEn;
bool dyncAippFlag;
const void *dyncParaAddr;
bool rotationFlag;
} ccConvolutionAipp_t;


typedef struct tagCcQuantizePara {
ccConvolutionScaleValueMode_t scaleValueMode;
uint16_t *scale;
uint16_t *offsetq;
int32_t *offsetw;
uint8_t *allOffsetw;
uint8_t *offsetPad;
} CcQuantizePara_t;

typedef struct tagCcQuantize {
ccQuantizeAlgo_t quantAlgo;

ccConvolutionScaleType_t scaleWType; // show scaleRq,scaleDq type

CcQuantizePara_t scaleQ;

CcQuantizePara_t scaleRq;

CcQuantizePara_t scaleDq;

// need relu
bool reluFlag;

// relu6
uint16_t *scaleRelu6;
bool bConcat;
} ccQuantize_t;

typedef struct tagCcPad *ccPadDescriptor_t;

/**
* @ingroup dnn
* @brief operation of Cum
*/
typedef enum {
CC_CUM_OP_SUM = 0, /**< sum */
CC_CUM_OP_PROD, /**< product */
CC_CUM_OP_INVALID
} CumOpType;

/**
* @ingroup dnn
* @brief desciptor of tensor
*/
typedef struct tagCcTensor *ccTensorDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of filter tensor
*/
typedef struct tagCcFilter *ccFilterDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of convolution operator
*/
typedef struct tagCcConvolution *ccConvolutionDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of correlation operator
*/
typedef struct tagCcConvolution *ccCorrelationDescriptor_t;
typedef struct tagCcFullConnection_t *ccFullConnectionDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of pooling operator
*/
typedef struct tagCcPooling *ccPoolingDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of activation operator
*/
typedef struct tagCcActivation *ccActivationDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of batchToSpace operator
*/
typedef struct tagCcBatchToSpace *ccBatchToSpaceDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of spaceToBatch operator
*/
typedef struct tagCcSpaceToBatch *ccSpaceToBatchDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of svdf operator
*/
typedef struct tagCcSvdf *ccSvdfDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of crop operator
*/
typedef struct tagCcCrop *ccCropDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of interp operator
*/
typedef struct tagCcInterp *ccInterpDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of GetRegionBox operator
*/
typedef struct tagCcGetRegionBox *ccGetRegionBoxDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of CorrectBoxes operator
*/
typedef struct tagCorrectBoxes *ccCorrectBoxesDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of ClsProb operator
*/
typedef struct tagClsProb *ccClsProbDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of NMS operator
*/
typedef struct tagCcNms *ccNmsDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of MultiClassNms operator
*/
typedef struct tagCcMultiClassNms *ccMultiClassNmsDescriptor_t;

/**
* @ingroup dnn
* @brief desciptor of MscnnBoxOutput operator
*/
typedef struct tagCcMscnnBoxOutput *ccMscnnBoxOutputDescriptor_t;

/**
* @ingroup dnn
* @brief define of SoftmaxTree
*/
typedef void *ccSoftmaxTree_t;

/**
* @ingroup dnn
* @brief descriptor of exp operator
*/
typedef struct tagCcExp *ccExpDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of log operator
*/
typedef struct tagCcLog *ccLogDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of pow operator
*/
typedef struct tagCcPow *ccPowDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of padv2 operator
*/
typedef struct tagCcPadV2 *ccPadV2Descriptor_t;

/**
* @ingroup dnn
* @brief desciptor of ShapeClassify operator
*/
typedef struct tagCcShapeClassify *ccShapeClassifyDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of DetectionFull3DOutput operator
*/
typedef struct tagCcDetectionFull3DOutput *ccDetectionFull3DOutputDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Quantize operator
*/
typedef struct tagCcQuantize *ccQuantizeDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of StridedSlice operator
*/
typedef struct tagCcStridedSlice *ccStridedSliceDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of StridedSliceAttrs operator
*/
typedef struct tagCcStridedSliceAttrs *ccStridedSliceAttrsDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of ResizeBilinear operator
*/
typedef struct tagCcResizeBilinear *ccResizeBilinearDescriptor_t;

typedef struct tagCcEltwise *ccEltwiseDescriptor_t;

typedef struct tagCcBatchNorm *ccBatchNormDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Square operator
*/
typedef struct tagCcSquare *ccSquareDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of NonMaxSuppression operator
*/
typedef struct tagNonMaxSuppression *ccNonMaxSuppressionDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of NonMaxSuppression operator
*/
typedef struct tagUpsamplePara *ccUpsampleParaDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of ResizeNearestNeighbor operator
*/
typedef struct tagCcResizeNearestNeighbor *ccResizeNearestNeighborDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Fill operator
*/
typedef struct tagCcFillParam *ccFillParamDescriptor_t;

/**
* @ingroup dnn
* @brief descriptor of Argmaxmin operator
*/
typedef struct tagCcArgmaxmin *ccArgmaxminDescriptor_t;

}; // namespace cce

#endif // DNN_BASE_HPP__

+ 4838
- 0
third_party/fwkacllib/inc/inc/cce/dnn_op.h
File diff suppressed because it is too large
View File


+ 23
- 0
third_party/fwkacllib/inc/inc/cce/dnn_struct.hpp View File

@@ -0,0 +1,23 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_STRUCT_HPP__
#define DNN_STRUCT_HPP__

#include "dnn.h"
#include "dnn_struct_base.hpp"

#endif // DNN_STRUCT_HPP__

+ 894
- 0
third_party/fwkacllib/inc/inc/cce/dnn_struct_base.hpp View File

@@ -0,0 +1,894 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DNN_STRUCT_BASE_HPP__
#define DNN_STRUCT_BASE_HPP__

#include "cce/cce_def.hpp"

namespace cce {

/**
* @ingroup dnn
* @brief max number of dimensions
*/
#define CC_DIM_MAX (8)

/**
* @ingroup dnn
* @brief max number of dimensions when use NC1HWC0 format
*/
#define CC_REALDIM_MAX (4)

/**
* @ingroup dnn
* @brief max input count of MscnnBoxOutput
*/
#define CC_MAX_INPUT_CNT (10)

/**
* @ingroup dnn
* @brief image dimensions of aipp input
*/
#define CC_AIPP_IMG_DIM (2)

/**
* @ingroup dnn
* @brief image channel number of aipp input
*/
#define CC_AIPP_IMG_CHN_NUM (4)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion matrix
*/
#define CC_AIPP_CSC_MATRIX_DIM (9)

/**
* @ingroup dnn
* @brief element number of aipp color space convertion bias
*/
#define CC_AIPP_CSC_BIAS_DIM (3)

/**
* @ingroup dnn
* @brief parameter number of op exp/log/pow
*/
#define PARAM_CNT_THREE (3)

/**
* @ingroup dnn
* @brief parameter number of op nonmaxsuppression
*/
#define PARAM_CNT_TWO (2)
#define DIMCNT_NUMBER_ONE (1)
#define DIMCNT_NUMBER_TWO (2)
#define DIMCNT_NUMBER_FOUR (4)

#define COMMON_FORMAT_NCHW_N_INDEX (0)
#define COMMON_FORMAT_NCHW_C_INDEX (1)
#define COMMON_FORMAT_NCHW_H_INDEX (2)
#define COMMON_FORMAT_NCHW_W_INDEX (3)

/**
* @ingroup dnn
* @brief parameter number of op upsample
*/
#define UPSAMPLE_SCAL_DEFAULT_TWO (2)
#define UPSAMPLE_ILLEGAL_VALUE_1 (1)

/**
* @ingroup dnn
* @brief struct define of StridedSlice required params.
*/

typedef struct tagCcStridedSlice {
uint32_t dimCnt;
int32_t begin[CC_DIM_MAX];
int32_t end[CC_DIM_MAX];
int32_t strides[CC_DIM_MAX];
} ccStridedSlice_t;

/**
* @ingroup dnn
* @brief struct define of Strided_slice attrs
*/
typedef struct tagCcStridedSliceAttrs {
uint32_t beginMask;
uint32_t endMask;
uint32_t ellipsisMask;
uint32_t newAxisMask;
uint32_t shrinkAxisMask;
} ccStridedSliceAttrs_t;

/**
* @ingroup dnn
* @brief params of batchToSpace
*/
typedef struct tagCcBatchToSpace {
int32_t blockShapeLength;
int32_t blockShape[CC_DIM_MAX];
int32_t crops[2 * CC_DIM_MAX];
} ccBatchToSpace_t;

/**
* @ingroup dnn
* @brief params of spaceToBatch
*/
typedef struct tagCcSpaceToBatch {
int32_t blockShapeLength;
int32_t blockShape[CC_DIM_MAX];
int32_t paddings[2 * CC_DIM_MAX];
} ccSpaceToBatch_t;

/**
* @ingroup dnn
* @brief struct define of tensor
*/
typedef struct tagCcTensor {
ccTensorFormat_t format;
ccDataType_t dataType;
int32_t dimCnt;
int32_t realDimCnt;
uint32_t dataSize;
int32_t dim[CC_DIM_MAX];
int32_t stride[CC_DIM_MAX];
ccVecQuantizePara_t vecQuantizePara;
} ccTensor_t;

/**
* @ingroup dnn
* @brief struct define of filter tensor
*/
typedef struct tagCcFilter {
ccTensorFormat_t format;
ccDataType_t dataType;
int32_t dimCnt;
uint32_t dataSize;
int32_t dim[CC_DIM_MAX];
} ccFilter_t;

/**
* @ingroup dnn
* @brief struct define of convolution operator
*/
typedef struct tagCcConvolution {
ccConvolutionMode_t mode;
ccPaddingMode_t padMode;
int32_t dimCnt;
int32_t padding[2 * (CC_DIM_MAX - 2)];
int32_t filterStride[CC_DIM_MAX - 2];
int32_t dilation[CC_DIM_MAX - 2];
int32_t group;
ccQuantizeDescriptor_t quantInfo;
ccConvolutionAipp_t aippInfo;
int32_t adj[CC_DIM_MAX - 2];
int32_t targetShape[CC_DIM_MAX - 2];
int32_t beforePadding[2 * (CC_DIM_MAX - 2)]; // pad before conv
uint32_t reluFlag;
int64_t concatBatchSize;
} ccConvolution_t;

#define ccCorrelation_t ccConvolution_t
typedef struct tagCcFullConnection_t {
ccQuantizeDescriptor_t quantInfo;
uint32_t infoTabSize;
const void *infoTab;
bool reluFlag;
ccFullConnectFwdAlgo_t algo;
} ccFullConnection_t;

typedef struct tagCcConcatFour2Five_t {
uint32_t branchNum; // how many branch for box or class
uint32_t classNum; // box branch's classNum is four, class branch's classNum is class number
} ccConcatFour2Five_t;

typedef struct tagCcTransdata_t {
uint64_t scaleQAddr;
uint8_t scaleQValueMode;
uint64_t offsetQAddr;
uint8_t quantAlgo;
uint8_t quantize8bitFlag;
} ccTransdata_t;
/**
* @ingroup dnn
* @brief struct define of pooling operator
*/
typedef struct tagCcPooling {
ccPoolingMode_t mode;
ccPaddingMode_t padMode;
ccNanPropagation_t maxpoolingNanOpt;
int32_t dimCnt;
int32_t windowDim[CC_DIM_MAX - 2];
int32_t padding[CC_DIM_MAX - 2];
int32_t stride[CC_DIM_MAX - 2];
int32_t dataMode;
int32_t ceilMode;
ccQuantizeDescriptor_t quantInfo;
ccPooingFwdAlgo_t algo;
} ccPooling_t;

/**
* @ingroup dnn
* @brief struct define of activation operator
*/
typedef struct tagCcActivation {
ccActivationMode_t mode;
ccNanPropagation_t reluNanOpt;
double coef; /* ceiling for clipped RELU, alpha for ELU */
ccActivationPara_u activationPara;
} ccActivation_t;

/**
* @ingroup dnn
* @brief struct define of svdf operator
*/
typedef struct tagCcSvdf {
ccTensorFormat_t format;
ccDataType_t dataType;
uint32_t batches;
uint32_t features;
uint32_t rank;
uint32_t inputSize;
uint32_t memorySize;
} ccSvdf_t;

/**
* @ingroup dnn
* @brief struct define of svdf operator
*/
typedef struct tagCcHashTableLookup {
ccTensorFormat_t format;
ccDataType_t lookupType;
ccDataType_t keyType;
ccDataType_t valueType;
ccDataType_t outputType;
ccDataType_t hitsType;
uint32_t lookups;
uint32_t keys;
uint32_t rows;
uint32_t features;
uint16_t valueScale;
uint16_t outputScale;
uint16_t valueOffset;
uint16_t outputOffset;
} ccHashTableLookup_t;

/**
* @ingroup dnn
* @brief struct define of prelu operator
*/
typedef struct tagCcPRelu {
ccNanPropagation_t reluNanOpt;
int32_t slopeCount;
bool channelShared;
} ccPRelu_t;

/**
* @ingroup dnn
* @brief struct define of crop operator
*/
typedef struct tagCcCrop {
int32_t startAxis;
int32_t offset[CC_DIM_MAX];
int32_t offsetCnt;
} ccCrop_t;

/**
* @ingroup dnn
* @brief struct define of SpatialTransformer operator
*/
typedef struct tagCcSpatialTransformer {
ccSamplerType_t samplerType;
ccDataType_t dataType;
int32_t dimCnt;
uint64_t dim[CC_DIM_MAX];
uint64_t alignCorner;
} ccSpatialTransformer_t;

/**
* @ingroup dnn
* @brief struct define of ShiftTransformer operator
*/
typedef struct tagCcShiftTransformer {
ccSamplerType_t samplerType;
double xPreDefined;
double yPreDefined;
bool xShift;
bool yShift;
int32_t gridH;
int32_t gridW;
} ccShiftTransformer_t;

/**
* @ingroup dnn
* @brief struct define of FasterRcnnProposal operator
*/
typedef struct tagCcFasterRcnnProposal {
int32_t preNMStopK;
int32_t postNMStopK;
float nmsTresh;
float minSize;
float featStride;
float baseSize;
int32_t ratioCnt;
int32_t scaleCnt;
float *ratio;
float *scale;
int32_t imgH;
int32_t imgW;
} ccFasterRcnnProposal_t;

/**
* @ingroup dnn
* @brief struct define of LRN operator
*/
typedef struct tagCcLRN {
ccLRNMode_t lrnMode;
int32_t lrnN;
double lrnAlpha;
double lrnBeta;
double lrnK;
} ccLRN_t;

/**
* @ingroup dnn
* @brief struct define of instanceNorm
*/
typedef struct tagCcInstancenorm {
ccInstanceNormMode_t mode;
double epsilon;
} ccInstancenorm_t;

/**
* @ingroup dnn
* @brief struct define of assignOp operator
*/
typedef struct tagCcAssignOp {
ccAssignOpMode_t assignOpMode;
} ccAssignOp_t;

/**
* @ingroup dnn
* @brief struct define of arcSinCos operator
*/
typedef struct tagCcArcSinCos {
ccArcSinCosMode_t arcSinCosMode;
} ccArcSinCos_t;

/**
* @ingroup dnn
* @brief struct define of Detectpostprocess operator
*/
typedef struct tagCcDetectpostprocess {
int32_t numClasses;
float confThreshold;
float nmsThreshold;
int32_t outTopK;
float bboxRegWeightsDx;
float bboxRegWeightsDy;
float bboxRegWeightsDw;
float bboxRegWeightsDh;
} ccDetectpostprocess_t;
/**
* @ingroup dnn
* @brief struct define of FasterRcnnDetectionOutput operator
*/
typedef struct tagCcFasterRcnnDetectionOutput {
int32_t numClasses;
float nmsThreshold;
float postConfThreshold;
int32_t imgH;
int32_t imgW;
int32_t batchSize;
} ccFasterRcnnDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of SsdDetectionOutput operator
*/
typedef struct tagCcSsdDetectionOutput {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
ccBoxCodeType_t codeType;
int32_t outTopK;
bool shareLocation;
bool varianceEncodedInTarget;
uint32_t boxTypeNum;
float var[4];
uint32_t variance_num;
} ccSsdDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of RefinedetDetectionOutput operator
*/
typedef struct tagCcRefinedetDetectionOutput {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
ccBoxCodeType_t codeType;
int32_t outTopK;
bool shareLocation;
bool varianceEncodedInTarget;
uint32_t boxTypeNum;
float var[4];
uint32_t variance_num;
double objectness_score;
} ccRefinedetDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of MsrGenerateRpnProposals operator
*/
typedef struct tagCcMsrGenerateRpnProposals {
int32_t preNmsTopK;
int32_t postNmsTopK;
float nmsThreshold;
float rpnMiniSize;
int32_t imgH;
int32_t imgW;
uint32_t boxTypeNum;
float scoreThreshold;
} ccMsrGenerateRpnProposals_t;

/**
* @ingroup dnn
* @brief struct define of RetinaPostprocessor operator
*/
typedef struct tagCcRetinaPostprocessor {
int32_t numClasses;
int32_t maxDetections;
float nmsThreshold;
float scoreThreshold;
int32_t imgH;
int32_t imgW;
uint32_t boxTypeNum;
float mean[4];
int32_t meanNum;
float std[4];
int32_t stdNum;
int32_t outputNum;
bool ocrFlag;
} ccRetinaPostprocessor_t;

/**
* @ingroup dnn
* @brief struct define of GenerateSsdAnchors operator
*/
typedef struct tagCcGenerateSsdAnchors {
int32_t featureMapShapeList[20];
uint32_t featureMapShapeListSize;
int32_t boxSpecsNum[10];
uint32_t boxSpecsNumSize;
float scales[10];
uint32_t scalesNum;
float aspectRatios[10];
uint32_t aspectRatiosNum;
int32_t baseAnchorSize[2];
uint32_t baseAnchorSizeNum;
int32_t anchorStride[2];
uint32_t anchorStrideNum;
int32_t anchorOffset[2];
uint32_t anchorOffsetNum;
bool reduceBoxesInLowestLayer;
float minScale;
float maxScale;
int32_t imgH;
int32_t imgW;
} ccGenerateSsdAnchors_t;

/**
* @ingroup dnn
* @brief struct define of MscnnBoxOutput operator
*/
typedef struct tagCcMscnnBoxOutput {
double fgThreshold;
double nmsThreshold;
ccNmsType_t nmsType;
int32_t fieldH[CC_MAX_INPUT_CNT];
int32_t fieldW[CC_MAX_INPUT_CNT];
int32_t downsampleRate[CC_MAX_INPUT_CNT];
int32_t defaultBoxCnt;
double fieldWhr;
double fieldXyr;
int32_t maxNmsNum;
int32_t maxPostNmsNum;
double minSize;
} ccMscnnBoxOutput_t;

/**
* @ingroup dnn
* @brief struct define of NMS operator
*/
typedef struct tagCcNms {
int32_t numClasses;
int32_t backgroundLabelId;
double preConfThreshold;
int32_t preTopK;
double nmsThreshold;
double nmsEta;
int32_t postTopK;
int32_t outTopK;
double postConfThreshold;
bool shareLocation;
} ccNms_t;

/**
* @ingroup dnn
* @brief struct define of NMS/MultiClassNMS operator
*/
typedef struct tagCcMultiClassNms {
uint64_t numClasses;
float objThreshold;
float nmsThreshold;
float clsThreshold;
bool normal;
uint64_t coorType;
} ccCcMultiClassNms_t;

/**
* @ingroup dnn
* @brief struct define of YoloDetectionOutput operator
*/
typedef struct tagCcYoloDetectionOutput {
ccYoloVersion_t yoloVersion;
uint32_t netH;
uint32_t netW;
uint32_t postTopK;
uint32_t classes;
float nmsThreshold;
float iouThreDecay;
float coorScaleFactor;
bool relative;
float objThreshold;
float clsThreshold;
uint32_t biasNum;
float *bias;
} ccYoloDetectionOutput_t;

/**
* @ingroup dnn
* @brief struct define of GetRegionBox operator
*/
#ifndef CC_MAX_YOLO_BIAS_NUM
#define CC_MAX_YOLO_BIAS_NUM (16)
#endif

typedef struct tagCcGetRegionBox {
uint32_t biasNum;
uint32_t H;
uint32_t W;
float bias[CC_MAX_YOLO_BIAS_NUM];
} ccGetRegionBox_t;

/**
* @ingroup dnn
* @brief struct define of CorrectBoxes operator
*/
typedef struct tagCorrectBoxes {
uint32_t netW;
uint32_t netH;
bool relative;
} ccCorrectBoxes_t;

/**
* @ingroup dnn
* @brief struct define of ClsProb operator
*/
typedef struct tagClsProb {
float objThreshold;
} ccClsProb_t;

/**
* @ingroup dnn
* @brief struct define of SsdPriorBox operator
*/
typedef struct tagCcSsdPriorBox {
ccBoxCodeType_t codeType;
double *minSize;
int32_t minSizeNum;
double *maxSize;
int32_t maxSizeNum;
double *aspectRatio;
int32_t aspectRatioNum;
double *variance;
int32_t varianceNum;
int32_t imgH;
int32_t imgW;
double stepH;
double stepW;
double offset;
bool flip;
bool clip;
} ccSsdPriorBox_t;

/**
* @ingroup dnn
* @brief struct define of Yolo2Region operator
*/
typedef struct tagCcYolo2Region {
ccSoftmaxTree_t softmaxTree;
bool softmax;
bool background;
bool treeSoftmax;
} ccYolo2Region_t;

/**
* @ingroup dnn
* @brief struct define of YoloRegion operator
*/
typedef struct tagCcYoloRegion {
ccSoftmaxTree_t softmaxTree;
bool softmax;
bool background;
bool treeSoftmax;
int32_t classes;
int32_t coords;
int32_t boxes;
ccYoloVersion_t yoloV;
} ccYoloRegion_t;

/**
* @ingroup dnn
* @brief struct define of power operator
*/
typedef struct tagCcPower {
float scale;
float shift;
float power;
} ccPower_t;

/**
* @ingroup dnn
* @brief struct define of exp operator
*/
typedef struct tagCcExp {
ccDataType_t dataType;
uint32_t paramCnt;
} ccExp_t;

/**
* @ingroup dnn
* @brief struct define of exp operator
*/
typedef struct tagCcLog {
ccDataType_t dataType;
uint32_t paramCnt;
} ccLog_t;

/**
* @ingroup dnn
* @brief struct define of pow operator
*/
typedef struct tagCcPow {
ccDataType_t dataType;
uint32_t paramCnt;
} ccPow_t;

/**
* @ingroup dnn
* @brief struct define of padv2 operator
*/
typedef struct tagCcPadV2 {
ccPadMode_t padMode;
void *padValue;
ccDataType_t padValueType;
int32_t padDimCnt;
int32_t padShapeLow[CC_DIM_MAX];
int32_t padShapeHigh[CC_DIM_MAX];
} ccPadV2_t;

/**
* @ingroup dnn
* @brief struct define of psROIPooling operator
*/
typedef struct tagCcPsRoiPooling {
ccPoolingMode_t poolingMode;
int32_t pooledH;
int32_t pooledW;
float spatialScale;
float padRatio;
int32_t groupSize;
int32_t outputDim;
} ccPsRoiPooling_t;

/**
* @ingroup dnn
* @brief struct define of RoIAlign operator
*/
typedef struct tagCcRoiAlign {
int32_t pooledH;
int32_t pooledW;
float spatialScale;
int32_t samplingRatio;
} ccRoiAlign_t;

/**
* @ingroup dnn
* @brief struct define of RoiInterpPooling operator
*/
typedef struct tagCcRoiInterpPooling {
int32_t pooledH;
int32_t pooledW;
int32_t poolKernelH;
int32_t poolKernelW;
int32_t pooledTailH;
int32_t pooledTailW;
float spatialScaleH;
float spatialScaleW;
} ccRoiInterpPooling_t;

/**
* @ingroup dnn
* @brief struct define of DetectionFull3DOutput operator
*/
typedef struct tagCcDetectionFull3DOutput {
int32_t imageWidth;
int32_t imageHeight;
int32_t numAngleBins;
float trcMarginRatioX;
float trcMarginRatioY;
int32_t pitchRangeD;
int32_t pitchPresetD;
float mountHeight;
int32_t visiblenessBins;
float meanVisibleness;
bool discreteVisibleness;
} ccDetectionFull3DOutput_t;

/**
* @ingroup dnn
* @brief struct define of MsrFastRcnnPredictions operator
*/
typedef struct tagMsrFastRcnnPredictions {
int32_t numClasses; // num of classes
float scoreThreshold; // the threshold of the score
double nmsThreshold; // the threshold of nms
int32_t postTopK;
int32_t outTopK;
int32_t imgH; // the height of image
int32_t imgW; // the width of image
} ccMsrFastRcnnPredictions_t;

typedef struct tagCcResizeBilinear {
ccResizeOutputDimMode_t resizeOutputDimMode;
bool alignCorners;
int32_t zoom_factor;
int32_t shrink_factor;
int32_t height;
int32_t width;
int32_t pad_begin;
int32_t pad_end;
} ccResizeBilinear_t;

typedef struct tagCcResizeNearestNeighbor {
bool alignCorners;
int32_t height;
int32_t width;
} ccResizeNearestNeighbor_t;

typedef struct tagCcEltwise {
ccQuantize_t *quantInfo;
bool reluFlag;
} ccEltwise_t;

typedef struct tagCcBatchNorm {
bool reluFlag;
} ccBatchNorm_t;

typedef struct tagCcPad {
ccPadMode_t padMode;
float padValue;
int32_t htoppad; // padLow[0]
int32_t hbottompad; // padHigh[0]
int32_t wleftpad; // padLow[1]
int32_t wrightpad; // padHigh[1]
} ccPad_t;

typedef struct tagCcSubCondition {
uint32_t BaseCondValue[4];
ccCMPType_t condType[4];
ccResultType_t resultType;
} ccSubCondition;

typedef struct tagCcShapeClassifyCond {
uint32_t subConditionNum;
ccResultType_t resultType;
uint32_t true_value;
ccSubCondition subCond[2];
} ccShapeClassifyCond;

#ifndef CC_SHAPE_CLASSIFY_CONDITION_NUM
#define CC_SHAPE_CLASSIFY_CONDITION_NUM (8)
#endif

typedef struct tagCcShapeClassify {
uint32_t shapeClassifyConditionNum;
uint32_t defaultValue;
ccShapeClassifyCond shapeClassifyCond[CC_SHAPE_CLASSIFY_CONDITION_NUM];
} ccShapeClassify_t;

/**
* @ingroup dnn
* @bref struct define of square operator
*/
typedef struct tagCcSquare {
ccSquareMode_t mode;
} ccSquare_t;

/*
* @ingroup dnn
* @brief operation of segment reduction
*/
typedef enum {
CC_SEGMENT_REDUCTION_OP_SUM = 0, /**< sum */
CC_SEGMENT_REDUCTION_OP_INVALID
} ccSegmentReductionOpType_t;

typedef struct tagCcFillParam {
// The filler type.
ccFillOpType_t fillType;
ccDataType_t valueDatatype;
const void *value; // the value in constant fill
const void *min; // the min value in uniform fill
const void *max; // the max value in uniform fill
const void *mean; // the mean value in Gaussian fill
const void *std; // the std value in Gaussian fill
// the seed used to generate data in Gaussian and uniform fill
int64_t seed1;
int64_t seed2;
} ccFillParam_t;

typedef struct tagNonMaxSuppression {
ccDataType_t dataType;
uint32_t paraCount;
} ccNonMaxSuppression_t;

typedef struct tagCcArgmaxmin {
int32_t axisType;
bool outMaxVal;
int64_t topK;
int64_t reduceSize;
int64_t reduceStride;
int64_t axis;
bool keepDims;
} ccArgmaxmin_t;

typedef struct tagUpsamplePara {
int32_t scale;
int32_t scaleHeight;
int32_t scaleWidth;
int32_t upsampleHeight;
int32_t upsampleWidth;
bool padOutHeight;
bool padOutWidth;
} ccUpsamplePara_t;

typedef struct tagCcConcatFive2Four_t {
ccTransForLossMode_t mode;
uint32_t classNum;
} ccConcatFive2Four_t;

}; // namespace cce
#endif // DNN_STRUCT_BASE_HPP__

+ 130
- 0
third_party/fwkacllib/inc/inc/cce/fwk_adpt_struct.h View File

@@ -0,0 +1,130 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FWK_ADPT_STRUCT_H__
#define FWK_ADPT_STRUCT_H__

#include <cstdint>

namespace aicpu {
namespace FWKAdapter {

// API RETURN CODE
enum FWKAdptAPIRetCode {
FWK_ADPT_SUCCESS = 0, // success
FWK_ADPT_NOT_INIT = 1, // not init
FWK_ADPT_ALLOC_FAILED = 2, // allocate memory failed
FWK_ADPT_PARAM_INVALID = 3, // invalid input param
FWK_ADPT_PARAM_PARSE_FAILED = 4, // parase input param failed
FWK_ADPT_NATIVE_ERROR = 5, // error code
FWK_ADPT_NOT_SUPPORT_OPTYPE = 6, // unsupport operate type
FWK_ADPT_INTERNAL_ERROR = 7, // adpter internal error
FWK_ADPT_NOT_SUPPORT_DATATYPE = 8, // unsupport input/output data type
FWK_ADPT_KERNEL_ALREADY_RUNING = 9, // kernel already runing, not support parallel run
FWK_ADPT_SESSION_NOT_EXIST = 10, // session id not exist
FWK_ADPT_SESSION_ALREADY_EXIST = 11, // session id alread exist for create session
FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12, // end of sequence
FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13, // extend info type not exist
FWK_ADPT_UNKNOWN_ERROR = 99 // unknown error code
};

// FWKAdapter operate type
// Notice: add new operate type need check with OMM, and make sure append to the end line.
enum FWKOperateType {
FWK_ADPT_SESSION_CREATE = 0,
FWK_ADPT_KERNEL_RUN,
FWK_ADPT_KERNEL_DESTROY,
FWK_ADPT_SESSION_DESTROY,
FWK_ADPT_SINGLE_OP_RUN,
FWK_ADPT_KERNEL_RUN_NO_SESS,
};

// Extend Info type for task
enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0,
FWK_ADPT_EXT_INPUT_SHAPE,
FWK_ADPT_EXT_OUTPUT_SHAPE,
FWK_ADPT_EXT_UPDATE_ADDR,
FWK_ADPT_EXT_OP_NAME,
FWK_ADPT_EXT_SESSION_INFO,
FWK_ADPT_EXT_BITMAP,
FWK_ADPT_EXT_INVALID
};

enum FWKExtUpdateAddrType {
FWK_ADPT_UPDATE_NULL = 0,
FWK_ADPT_UPDATE_INPUT,
FWK_ADPT_UPDATE_OUTPUT,
FWK_ADPT_UPDATE_INPUT_OUTPUT
};

#pragma pack(push, 1)
// API Parameter Structure
struct StrFWKKernel {
FWKOperateType opType;
uint64_t sessionID; // unique

uint64_t stepIDAddr; // step id addr
uint64_t kernelID; // run kernel id, unique in session
uint64_t nodeDefLen; // nodeDef protobuf len
uint64_t nodeDefBuf; // NodeDef protobuf offset addr, need convert to void*
uint64_t funDefLibLen; // FunctionDefLibrary protobuf len
uint64_t funDefLibBuf; // FunctionDefLibrary protobuf addr which use in NodeDef, need convert to void*

uint64_t inputOutputLen; // InputOutput shap protobuf len
uint64_t inputOutputBuf; // InputOutput shap protobuf addr, need convert to void*
uint64_t workspaceBaseAddr; // Workspace base addr, need convert to void*
uint64_t inputOutputAddr; // InputOutput addr, need convert to void*

uint64_t extInfoLen; // extend info total length
uint64_t extInfoAddr; // extend info addr, ExtInfo structure
};
#pragma pack(pop)

typedef StrFWKKernel FWKOperateParam;

// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
#pragma pack(push, 1)
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
};
#pragma pack(pop)

// Extend info structure for extInfoAddr
const uint32_t kExtInfoHeadSize = 8;

#pragma pack(push, 1)
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
};
#pragma pack(pop)

#pragma pack(push, 1)
struct ResultSummary {
uint64_t shape_data_ptr; // shape data addr, need convert to void*
uint64_t shape_data_size; // num of dims
uint64_t raw_data_ptr; // raw data addr, need convert to void*
uint64_t raw_data_size; // size of raw data
};
#pragma pack(pop)
} // end namespace FWKAdapter
} // namespace aicpu

#endif // FWK_ADPT_STRUCT_H__

+ 56
- 0
third_party/fwkacllib/inc/inc/cce/l2fusion_struct.hpp View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef L2FUSION_STRUCT_HPP_
#define L2FUSION_STRUCT_HPP_

#include <map>
#include <string>
#include "runtime/kernel.h"

#define L2_DYNAMIC_SPLIT_NUM

using namespace std;

namespace fusion {

typedef struct tagL2Data {
uint32_t l2Index;
uint64_t l2Addr;
uint64_t l2PageNum;
} L2Data_t;

typedef std::map<uint64_t, L2Data_t> L2DataMap_t; // the key is ddr addr
typedef std::pair<uint64_t, L2Data_t> L2DataPair_t; // the key is ddr addr

typedef struct TagTaskL2Info {
string nodeName;
rtL2Ctrl_t l2ctrl;

L2DataMap_t input;
L2DataMap_t output;
uint32_t isUsed;
} TaskL2Info_t;

typedef std::map<uint32_t, TaskL2Info_t> TaskL2InfoMap_t; // the key is nodeId
typedef std::pair<uint32_t, TaskL2Info_t> TaskL2InfoPair_t; // the key is nodeId

typedef std::map<string, TaskL2Info_t> TaskL2InfoFEMap_t; // the key is nodeName
typedef std::pair<string, TaskL2Info_t> TaskL2InfoFEPair_t; // the key is nodeName

} // namespace fusion

#endif // L2FUSION_STRUCT_HPP_

+ 65
- 0
third_party/fwkacllib/inc/inc/cce/optimizer/fusion_engine.h View File

@@ -0,0 +1,65 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FUSION_ENGINE_HPP_
#define FUSION_ENGINE_HPP_

#include "cce/cce.h"
#include "graph/compute_graph.h"
#include "proto/task.pb.h"

#include <map>
#include <vector>

using namespace domi;
using namespace std;

namespace fusion {
enum {
FUSION_STATUS_SUCCESS = 0,
FUSION_STATUS_FAIL = 1,
};

typedef struct {
uint64_t weightSize;
uint64_t memorySize;
uint8_t *dataMemBase;
uint8_t *weightMemBase;
uint32_t l2Enable; // 1 //1 - enable l2 buffer allocation, 0 - disable l2 buffer allocation
uint32_t fusionEnable; // 1 // 1 - enable buffer fusion, 0 - disable buffer fusion
} ModelRes;

static const std::string SCOPE_ID_ATTR = "fusion_scope";
static const std::string L2FUSION_DYNAMIC_CONVERGE_OP = "l2fusion_dynamic_converge_op";
static const std::string L2FUSION_DYNAMIC_SPLIT_NUM = "l2fusion_dynamic_split_num";
static const std::string FUSION_VIRTUAL_OP = "fusion_virtual_op";
static const std::string FUSION_MULTI_BATCH_STRIDE = "fusion_multi_bathc_stride";

#define TVM_TYPE 1

typedef std::map<int64_t, std::vector<ge::NodePtr>> kScopeNodeMap_t;
typedef std::pair<int64_t, std::vector<ge::NodePtr>> kScopeNodePair_t;

uint32_t BufferFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph, bool enable_l2dynamic = true);
uint32_t BufferFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
uint32_t GraphFusion(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
uint32_t FusionTaskBuild(cce::ccHandle_t ccHandle, ge::ComputeGraphPtr fusionGraph, ge::Buffer &buffer,
ModelRes &modelRes, std::vector<TaskDef> &task_def_list_);
void FusionTaskBuildComplete(std::vector<cce::ccHandle_t> cchandleList);
uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph);
} // namespace fusion

#endif // FUSION_ENGINE_HPP_

+ 54
- 0
third_party/fwkacllib/inc/inc/cce/taskdown_api.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef TASKDOWN_API_H_
#define TASKDOWN_API_H_

#include <map>
#include <vector>
#include "cce/cce.h"
#include "l2fusion_struct.hpp"
#include "taskdown_common.hpp"

namespace cce {

#define CC_FUSION_OP_MAX 32

typedef struct tagOpAddrsInfo {
void *addrPos;
uintptr_t addrData;
} ccOpAddrsInfo;

#ifdef __cplusplus
extern "C" {
#endif

ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr,
uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr);

#ifdef __cplusplus
}
#endif

ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr,
std::vector<ccOpAddrsInfo> &opAddrsInfo);

ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &dateInfo);

ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);

} // namespace cce
#endif // TASKDOWN_API_H_

+ 107
- 0
third_party/fwkacllib/inc/inc/cce/taskdown_common.hpp View File

@@ -0,0 +1,107 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef TASKDOWN_COMMON_H_
#define TASKDOWN_COMMON_H_

#include <map>
#include "cce/cce_def.hpp"
#include "common/attr_list.hpp"
#include "l2fusion_struct.hpp"

namespace cce {

#define CC_FUSION_OP_MAX 32

typedef enum tagccKernelType {
CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/
CUSTOMIZED = 3, /* customized operator */
TE_AI_CORE = 4, /* te aicore operator*/
TE_AI_CPU = 5, /* te aicpu operator */
AI_CPU = 6, /* aicpu */
CUST_AI_CPU = 7, /* custom aicpu*/
INVALID = 8, /* unknown kernel type */
} ccKernelType;

typedef struct tagOpContext {
ccKernelType kernelType;
uint32_t opId;
uint32_t kernelFuncId;
uint32_t opIndex;
uint32_t opCount;
uint32_t opIndex2[CC_FUSION_OP_MAX];
bool isFlowtable;
uint16_t *argsOffset;
uint32_t argsCount;
uint64_t genDataBaseAddr;
uint64_t genDataBaseSize;
uint64_t genWeightBaseAddr;
uint64_t genWeightBaseSize;
uint64_t genVariableBaseAddr;
uint64_t genVariableBaseSize;
uint64_t l2ctrlSize;
} ccOpContext;

typedef struct tagOpReadCount {
bool isEnable;
std::map<uint64_t, uint32_t> tensorRc;
} ccOpReadCount;

typedef enum tagTaskDownKernelIdMode {
CC_TASKDOWN_RESERVED = 0,
CC_TASKDOWN_ROIPOOLING,
CC_TASKDOWN_ROIPOOLING_PERF,
CC_TASKDOWN_ROIALIGN,
CC_TASKDOWN_ROIALIGN_PERF,
CC_TASKDOWN_FC,
CC_TASKDOWN_FC_COMPRESS,
CC_TASKDOWN_SOFTMAX_LOWEST,
CC_TASKDOWN_ROIALIGN_FP16,
CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR,
CC_TASKDOWN_RESIZE_NEAREST_NEIGHBOR_COMMON,
} ccTaskDownKernelIdMode_t;

ccStatus_t GetStream(ccHandle_t handle, rtStream_t *streamId);

ccStatus_t ccClearOpMap(ccHandle_t handle);

ccStatus_t ccSetKernelOpMap(ccHandle_t handle);

ccStatus_t ccSetKernelContext(ccHandle_t handle, uint32_t opId, AttrList &attrList, bool isFlowtable,
ccKernelType kernelType, void *pgraph);

ccStatus_t ccGetKernelContext(rtStream_t streamId, ccOpContext &opContext);

ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType);

ccStatus_t ccSetStreamL2Map(ccHandle_t handle, fusion::TaskL2InfoMap_t &l2AllocRes);

ccStatus_t ccGetStreamL2Map(rtStream_t streamId, uint32_t opIndex, fusion::TaskL2Info_t *&l2Data);

ccStatus_t ccSetOpIndex(ccHandle_t handle, uint32_t opIndex);

ccStatus_t ccGetOpIndex(ccHandle_t handle, uint32_t &opIndex);

ccStatus_t ccGetOpIndexByStream(rtStream_t streamId, uint32_t &opIndex);

ccStatus_t ccClearStreamL2Map(ccHandle_t handle);

ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc);

} // namespace cce
#endif // TASKDOWN_COMMON_H_

+ 129
- 0
third_party/fwkacllib/inc/inc/hccl/base.h View File

@@ -0,0 +1,129 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file base.h
* @brief HCOM data type definition
*
*/

#ifndef HCCL_BASE_H_
#define HCCL_BASE_H_
#include <hccl/hccl_types.h>
#include <string>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;

/**
* @brief Horovod Reduction opperation
*/
typedef enum {
HOROVOD_REDUCE_AVERAGE = 0, /**< average */
HOROVOD_REDUCE_SUM = 1, /**< sum */
HOROVOD_REDUCE_ADASUM = 2, /**< adasum */
HOROVOD_REDUCE_MIN = 3, /**< min */
HOROVOD_REDUCE_MAX = 4, /**< max */
HOROVOD_REDUCE_PROD = 5, /**< proo */
HOROVOD_REDUCE_RESERVED /**< reserved */
} HorovodReduceOp;

const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments.

/**
* @brief the feature of the model
*/
struct model_feature {
const char *model_name; /**< The model name */
u32 gradient_num; /**< The number of gradients */
float *gradient_size; /**< The size of each gradient */
float *gradient_time; /**< The BP compution time of each gradient */
};

/**
* @brief Memory Register Address Struct for Remote Access
*/
struct MemRegisterAddr {
u64 addr;
u64 length;
};
/*
* @brief The max number of memory register address for remote access.
*/
const u32 HCCL_MAX_MEM_REGISTER_NUM = 32;

enum GradSplitForceMode {
FORCE_NONE, /**< no force */
FORCE_SIZE, /**< force split gradient by size */
FORCE_RESERVED /**< reserved */
};

enum OriginalGraphShapeType {
KNOWN_SHAPE,
UNKNOWN_SHAPE,
SHAPE_RESERVED /**< reserved */
};

/**
* @brief stream handle.
*/
typedef void *rtStream_t;

/**
* @brief model handle.
*/
typedef void *rtModel_t;

struct HcomOperation {
std::string hcclType;
void *inputPtr;
void *outputPtr;
u64 count;
HcclDataType dataType;
HcclReduceOp opType;
u32 root;

HcomOperation()
{
inputPtr = nullptr;
outputPtr = nullptr;
count = 0;
dataType = HCCL_DATA_TYPE_RESERVED;
opType = HCCL_REDUCE_RESERVED;
root = 0;
}
};

struct HcomRemoteAccessAddrInfo {
u32 remotetRankID;
u64 remoteAddr; // host embedding table address
u64 localAddr; // device HBM address
u64 length; // Memory Length in Bytes
};

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_BASE_H_

+ 179
- 0
third_party/fwkacllib/inc/inc/hccl/hcom.h View File

@@ -0,0 +1,179 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* @file hcom.h
* @brief HCOM API
*/

#ifndef HCOM_H_
#define HCOM_H_

#include <hccl/base.h>
#include <hccl/hccl_types.h>
#include <functional>
#include <vector>

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus



/**
* @brief Get the rank number in the group.
*
* @param group A string identifying the group name.
* @param rankSize A pointer identifying the rank number.
* @return HcclResult
*/
HcclResult HcomGetRankSize(const char *group, u32 *rankSize);

/**
* @brief Get the rank number of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param localRankSize A pointer identifying the rank number.
* @return HcclResult
*/
HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);

/**
* @brief Get the rank id of this rank.
*
* @param group A string identifying the group name.
* @param rankId A pointer identifying the rank id.
* @return HcclResult
*/
HcclResult HcomGetRankId(const char *group, u32 *rankId);

/**
* @brief Get the local rank id of this rank's server within the group.
*
* @param group A string identifying the group name.
* @param localRankId A pointer identifying the local rank id.
* @return HcclResult
*/
HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);

/**
* @brief Get the world rank id according to the group rank id.
*
* @param group A string identifying the group name.
* @param groupRank An integer(u32) identifying the group rank id.
* @param worldRank A pointer identifying the world rank id.
* @return HcclResult
*/
HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);

/**
* @brief Get the group rank id according to the world rank id.
*
* @param worldRank An integer(u32) identifying the world rank id.
* @param group A string identifying the group name.
* @param groupRank A pointer identifying the group rank id.
* @return HcclResult
*/
HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);

/**
* @brief Create group.
*
* @param group A string identifying the group name.
* @param rankNum An integer(u32) identifying the number of ranks in the group.
* @param rankIds A list identifying the ranks in the group.
* @return HcclResult
*/
HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);

/**
* @brief Destroy group
*
* @param group A string identifying the group name.
* @return HcclResult
*/
HcclResult HcomDestroyGroup(const char *group);

/**
* @brief Set the gradient split strategy with in the group, according to gradient index.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param IdxList A list identifying the index of end gradient in each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);

/**
* @brief Set the gradient split strategy with in the group, according to gradient data size.
*
* @param group A string identifying the group name.
* @param segmentNum An integer(u32) identifying the segments number of gradients.
* @param sizeList A list identifying the percent of each segment.
* @return HcclResult
*/
extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

/**
* @brief Initialize hcom executor.
*
* @param void
* @return HcclResult
*/
HcclResult HcomExecInitialize();

/**
* @brief Finalize hcom executor.
*
* @param void
* @return HcclResult
*/
HcclResult HcomExecFinalize();

/**
* @brief Put collective communication operation into hcom executor.
*
* @param opInfo information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

/**
* @brief Put remote access operation into hcom executor.
*
* @param remoteAccessType operation type (read or write).
* @param addrInfos address information about collective communication operation.
* @param callback callback after collective communication operation.
* @return HcclResult
*/
HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
std::function<void(HcclResult status)> callback);

/**
* @brief Register memories and init resources for remote access.
*
* @param addrList memory addresses for remote access.
* @param count number of remote memory addresses.
* @return HcclResult
*/
extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCOM_H_

+ 142
- 0
third_party/fwkacllib/inc/inc/mmpa/mmpa_api.h View File

@@ -0,0 +1,142 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef _MMPA_API_H_
#define _MMPA_API_H_

#define LINUX 0
#define WIN 1

#if(OS_TYPE == LINUX) //lint !e553

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#ifdef FUNC_VISIBILITY
#define MMPA_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define MMPA_FUNC_VISIBILITY
#endif

#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <unistd.h>
#include <semaphore.h>
#include <fcntl.h>
#include <dlfcn.h>
#include <signal.h>
#include <pthread.h>
#include <syslog.h>
#include <dirent.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <string.h>
#include <poll.h>
#include <net/if.h>
#include <stdarg.h>
#include <limits.h>
#include <ctype.h>
#include <stddef.h>
#include <dirent.h>
#include <getopt.h>
#include <libgen.h>

#include <linux/types.h>
#include <linux/hdreg.h>
#include <linux/fs.h>
#include <linux/limits.h>

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/resource.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/shm.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <sys/ipc.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <sys/msg.h>
#include <sys/wait.h>
#include <sys/statvfs.h>
#include <sys/prctl.h>
#include <sys/inotify.h>

#include "securec.h"

#include "./sub_inc/mmpa_typedef_linux.h"
#include "./sub_inc/mmpa_linux.h"

#endif


#if(OS_TYPE == WIN) //lint !e553

#ifdef FUNC_VISIBILITY
#define MMPA_FUNC_VISIBILITY _declspec(dllexport)
#else
#define MMPA_FUNC_VISIBILITY
#endif

#include <winsock2.h>
#include <winsock.h>
#include "Windows.h"
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <io.h>
#include <stdio.h>
#include <ws2tcpip.h>
#include <winioctl.h>
#include <WinBase.h>
#include <mswsock.h>
#include <strsafe.h>
#include <signal.h>
#include <time.h>
#include <stdarg.h>
#include "shlwapi.h"
#include <direct.h>
#include <VersionHelpers.h>
#include <processthreadsapi.h>
#include <Wbemidl.h>
#include <iphlpapi.h>
#include <synchapi.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "securec.h"

#include "sub_inc/mmpa_typedef_win.h"
#include "sub_inc/mmpa_win.h"

#pragma comment(lib, "ws2_32.lib")
#pragma comment(lib, "mswsock.lib")
#pragma comment(lib, "Kernel32.lib")
#pragma comment(lib, "shlwapi.lib")
#pragma comment(lib, "wbemuuid.lib")
#pragma comment(lib, "Iphlpapi.lib")
#endif

#endif // MMPA_API_H_


+ 561
- 0
third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_linux.h View File

@@ -0,0 +1,561 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MMPA_LINUX_MMPA_LINUX_H
#define MMPA_LINUX_MMPA_LINUX_H

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus

#define MMPA_MACINFO_DEFAULT_SIZE 18
#define MMPA_CPUDESC_DEFAULT_SIZE 64

typedef pthread_t mmThread;
typedef pthread_mutex_t mmMutex_t;
typedef pthread_cond_t mmCond;
typedef pthread_mutex_t mmMutexFC;
typedef pthread_rwlock_t mmRWLock_t;
typedef signed int mmProcess;
typedef int mmPollHandle;
typedef int mmPipeHandle;
typedef int mmFileHandle;
typedef int mmComPletionKey;
typedef int mmCompletionHandle;
typedef int mmErrorMsg;
typedef int mmFd_t;

typedef VOID *mmExitCode;
typedef key_t mmKey_t;
typedef int mmMsgid;
typedef struct dirent mmDirent;
typedef struct dirent mmDirent2;
typedef struct shmid_ds mmshmId_ds;
typedef int (*mmFilter)(const mmDirent *entry);
typedef int (*mmFilter2)(const mmDirent2 *entry);
typedef int (*mmSort)(const mmDirent **a, const mmDirent **b);
typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b);
typedef size_t mmSize_t; //lint !e410 !e1051
typedef off_t mmOfft_t;
typedef pid_t mmPid_t;
typedef long MM_LONG;

typedef VOID *(*userProcFunc)(VOID *pulArg);

typedef struct {
userProcFunc procFunc; // Callback function pointer
VOID *pulArg; // Callback function parameters
} mmUserBlock_t;

typedef struct {
const char *dli_fname;
void *dli_fbase;
const char *dli_sname;
void *dli_saddr;
size_t dli_size; /* ELF only */
int dli_bind; /* ELF only */
int dli_type;
} mmDlInfo;

typedef struct {
int wSecond; // Seconds. [0-60] (1 leap second)
int wMinute; // Minutes. [0-59]
int wHour; // Hours. [0-23]
int wDay; // Day. [1-31]
int wMonth; // Month. [1-12]
int wYear; // Year
int wDayOfWeek; // Day of week. [0-6]
int tm_yday; // Days in year.[0-365]
int tm_isdst; // DST. [-1/0/1]
long int wMilliseconds; // milliseconds
} mmSystemTime_t;

typedef sem_t mmSem_t;
typedef struct sockaddr mmSockAddr;
typedef socklen_t mmSocklen_t;
typedef int mmSockHandle;
typedef timer_t mmTimer;
typedef pthread_key_t mmThreadKey;

typedef int mmOverLap;

typedef ssize_t mmSsize_t;
typedef size_t mmSize; // size

typedef struct {
UINT32 createFlag;
INT32 oaFlag;
} mmCreateFlag;

typedef struct {
VOID *sendBuf;
INT32 sendLen;
} mmIovSegment;
typedef struct in_addr mmInAddr;

typedef struct {
VOID *inbuf;
INT32 inbufLen;
VOID *outbuf;
INT32 outbufLen;
mmOverLap *oa;
} mmIoctlBuf;

typedef int mmAtomicType;
typedef int mmAtomicType64;

typedef enum {
pollTypeRead = 1, // pipe read
pollTypeRecv, // socket recv
pollTypeIoctl, // ioctl
} mmPollType;

typedef struct {
mmPollHandle handle; // The file descriptor or handle of poll is required
mmPollType pollType; // Operation type requiring poll
// read or recv or ioctl
INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL
mmComPletionKey completionKey; // The default value is blank, which is used in windows
// The data used to receive the difference between which handle is readable
} mmPollfd;

typedef struct {
VOID *priv; // User defined private content
mmPollHandle bufHandle; // Value of handle corresponding to buf
mmPollType bufType; // Data types polled to
VOID *buf; // Data used in poll
UINT32 bufLen; // Data length used in poll
UINT32 bufRes; // Actual return length
} mmPollData, *pmmPollData;

typedef VOID (*mmPollBack)(pmmPollData);

typedef struct {
INT32 tz_minuteswest; // How many minutes is it different from Greenwich
INT32 tz_dsttime; // type of DST correction
} mmTimezone;

typedef struct {
LONG tv_sec;
LONG tv_usec;
} mmTimeval;

typedef struct {
MM_LONG tv_sec;
MM_LONG tv_nsec;
} mmTimespec;

typedef struct {
ULONGLONG totalSize;
ULONGLONG freeSize;
ULONGLONG availSize;
} mmDiskSize;

#define mmTLS __thread
typedef struct stat mmStat_t;
typedef struct stat64 mmStat64_t;
typedef mode_t mmMode_t;

typedef struct option mmStructOption;

typedef struct {
char addr[MMPA_MACINFO_DEFAULT_SIZE]; // ex:aa-bb-cc-dd-ee-ff\0
} mmMacInfo;

typedef struct {
char **argv;
INT32 argvCount;
char **envp;
INT32 envpCount;
} mmArgvEnv;

typedef struct {
char arch[MMPA_CPUDESC_DEFAULT_SIZE];
char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE]; // vendor
char version[MMPA_CPUDESC_DEFAULT_SIZE]; // modelname
INT32 frequency; // cpu frequency
INT32 maxFrequency; // max speed
INT32 ncores; // cpu cores
INT32 nthreads; // cpu thread count
INT32 ncounts; // logical cpu nums
} mmCpuDesc;

typedef mode_t MODE;

typedef struct {
INT32 detachFlag; // Determine whether to set separation property 0, not to separate 1
INT32 priorityFlag; // Determine whether to set priority 0 and not set 1
INT32 priority; // Priority value range to be set 1-99
INT32 policyFlag; // Set scheduling policy or not 0 do not set 1 setting
INT32 policy; // Scheduling policy value value
// MMPA_THREAD_SCHED_RR
// MMPA_THREAD_SCHED_OTHER
// MMPA_THREAD_SCHED_FIFO
INT32 stackFlag; // Set stack size or not: 0 does not set 1 setting
UINT32 stackSize; // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN
} mmThreadAttr;

#ifdef __ANDROID__
#define S_IREAD S_IRUSR
#define S_IWRITE S_IWUSR
#endif

#define mm_no_argument no_argument
#define mm_required_argument required_argument
#define mm_optional_argument optional_argument

#define M_FILE_RDONLY O_RDONLY
#define M_FILE_WRONLY O_WRONLY
#define M_FILE_RDWR O_RDWR
#define M_FILE_CREAT O_CREAT

#define M_RDONLY O_RDONLY
#define M_WRONLY O_WRONLY
#define M_RDWR O_RDWR
#define M_CREAT O_CREAT
#define M_BINARY O_RDONLY
#define M_TRUNC O_TRUNC
#define M_IRWXU S_IRWXU
#define M_APPEND O_APPEND

#define M_IN_CREATE IN_CREATE
#define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
#define M_IN_IGNORED IN_IGNORED

#define M_OUT_CREATE IN_CREATE
#define M_OUT_CLOSE_WRITE IN_CLOSE_WRITE
#define M_OUT_IGNORED IN_IGNORED
#define M_OUT_ISDIR IN_ISDIR

#define M_IREAD S_IREAD
#define M_IRUSR S_IRUSR
#define M_IWRITE S_IWRITE
#define M_IWUSR S_IWUSR
#define M_IXUSR S_IXUSR
#define FDSIZE 64
#define M_MSG_CREAT IPC_CREAT
#define M_MSG_EXCL (IPC_CREAT | IPC_EXCL)
#define M_MSG_NOWAIT IPC_NOWAIT

#define M_WAIT_NOHANG WNOHANG // Non blocking waiting
#define M_WAIT_UNTRACED \
WUNTRACED // If the subprocess enters the suspended state, it will return immediately
// But the end state of the subprocess is ignored
#define M_UMASK_USRREAD S_IRUSR
#define M_UMASK_GRPREAD S_IRGRP
#define M_UMASK_OTHREAD S_IROTH

#define M_UMASK_USRWRITE S_IWUSR
#define M_UMASK_GRPWRITE S_IWGRP
#define M_UMASK_OTHWRITE S_IWOTH

#define M_UMASK_USREXEC S_IXUSR
#define M_UMASK_GRPEXEC S_IXGRP
#define M_UMASK_OTHEXEC S_IXOTH

#define mmConstructor(x) __attribute__((constructor)) VOID x()
#define mmDestructor(x) __attribute__((destructor)) VOID x()

#define MMPA_NO_ARGUMENT 0
#define MMPA_REQUIRED_ARGUMENT 1
#define MMPA_OPTIONAL_ARGUMENT 2

#define MMPA_MAX_PATH PATH_MAX
#define M_NAME_MAX MAX_FNAME

#define M_F_OK F_OK
#define M_X_OK X_OK
#define M_W_OK W_OK
#define M_R_OK R_OK


#define MM_DT_DIR DT_DIR
#define MM_DT_REG DT_REG

#define MMPA_STDIN STDIN_FILENO
#define MMPA_STDOUT STDOUT_FILENO
#define MMPA_STDERR STDERR_FILENO

#define MMPA_RTLD_NOW RTLD_NOW
#define MMPA_RTLD_GLOBAL RTLD_GLOBAL
#define MMPA_RTLD_LAZY RTLD_LAZY
#define MMPA_RTLD_NODELETE RTLD_NODELETE

#define MMPA_DL_EXT_NAME ".so"

MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock);
MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle);
MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond);
MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmGetPid();
MMPA_FUNC_VISIBILITY INT32 mmGetTid();
MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);

MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode);
MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type);
MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd);
MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream);
MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen);
MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen);
MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol);
MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog);
MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen);
MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
VOID *sendMsg,
INT32 sendLen,
UINT32 sendFlag,
const mmSockAddr* addr,
INT32 tolen);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
VOID *recvBuf,
mmSize recvLen,
UINT32 recvFlag,
mmSockAddr* addr,
mmSocklen_t *FromLen);
MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
mmUserBlock_t *timerBlock,
UINT milliSecond,
UINT period);
MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode);
MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond);

MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock);
MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid);
MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio);
MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle);
MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio);
MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName);
MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode);
MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName);

MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr);
MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout);
MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 iovcnt);
MMPA_FUNC_VISIBILITY VOID mmMb();
MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);

MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);

MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);

// The following 3 interfaces are to be deleted
MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);

MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);

// Poll related interface
MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
INT32 fdCount,
INT32 timeout,
mmCompletionHandle handleIOCP,
pmmPollData polledData,
mmPollBack pollBack);
MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount();
MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath);
MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen);

MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd);

MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd);

MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename);

MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode);

MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream);

MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort);
MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort);

MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count);
MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count);

MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag);

MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag);

MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);

MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);

MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid);

MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result);

MMPA_FUNC_VISIBILITY INT32 mmGetOptErr();
MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr);
MMPA_FUNC_VISIBILITY INT32 mmGetOptInd();
MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd);
MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt();
MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
char *const *argv,
const char *opts,
const mmStructOption *longOpts,
INT32 *longIndex);

MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);

MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *));
MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value);
MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key);
MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key);
MMPA_FUNC_VISIBILITY INT32 mmGetOsType();

MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd);
MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd);
MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path);
MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode);
MMPA_FUNC_VISIBILITY INT32 mmThreadKill(mmThread id);
MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options);

MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen);
MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len);
MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite);
MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr);
MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path);
MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path);
MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize);

/*
* Function: set the thread name created by mmcreatetask
* Input: pstThreadHandle: thread ID
* name: thread name, the actual length of name must be < MMPA_THREADNAME_SIZE
* The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
* execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name);

/*
* Function: get thread name
* Input: pstThreadHandle: thread ID
* size: Cache length of thread name
* name:User allocated cache for thread name, Cache length must be >= MMPA_THREADNAME_SIZE
* The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
* execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size);
/*
* Function:Set the thread name of the currently executing thread - call inside the thread body
* Input:name:Thread name to be set
* The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
* execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name);
/*
* Function:Get the thread name of the currently executing thread - in body call
* Input:name:The name of the thread to get, and the cache is allocated by the user,size>=MMPA_THREADNAME_SIZE
* The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
* execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size);
MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length);
MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName);
MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize);
MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength);
MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
const mmArgvEnv *env,
const char *stdoutRedirectFile,
mmProcess *id);

MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
const mmUserBlock_t *funcBlock,
const mmThreadAttr *threadAttr);
MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
#define MMPA_DLL_API

#ifdef __cplusplus
#if __cplusplus
}
#endif /* __cpluscplus */
#endif // __cpluscplus

#endif // MMPA_LINUX_MMPA_LINUX_H_

+ 98
- 0
third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_linux.h View File

@@ -0,0 +1,98 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MMPA_TYPEDEF_LINUX_H
#define MMPA_TYPEDEF_LINUX_H

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus

#ifndef FALSE
#define FALSE 0
#endif

#ifndef TRUE
#define TRUE 1
#endif

typedef unsigned char UINT8;
typedef signed char INT8;
typedef unsigned short UINT16;
typedef signed short INT16;
typedef unsigned int UINT32;
typedef signed int INT32;
typedef unsigned long long UINT64;
typedef signed long long INT64;
typedef float FLOAT;
typedef double DOUBLE;
typedef void VOID;
typedef unsigned char UCHAR;
typedef char CHAR;
typedef unsigned short USHORT;
typedef short SHORT;
typedef unsigned int UINT;
typedef int INT;
typedef unsigned long ULONG;
typedef unsigned long long ULONGLONG;

typedef long LONG;

#define HANDLE_INVALID_VALUE (-1)
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define PATH_SIZE 256
#define MAX_IOVEC_SIZE 32
#define MMPA_MAX_SLEEP_MILLSECOND 4294967
#define MAX_PIPE_COUNT 2
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE 64
#define MMPA_MIN_OS_VERSION_SIZE 128

#define MMPA_ONE_THOUSAND 1000
#define MMPA_ONE_BILLION 1000000000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define MMPA_ZERO 0
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_DEFAULT_PIPE_PERMISSION 0777
#define MMPA_DEFAULT_MSG_TYPE 1

#define MMPA_THREAD_SCHED_RR SCHED_RR
#define MMPA_THREAD_SCHED_FIFO SCHED_FIFO
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN

#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER

#define MMPA_MAX_NI 19
#define MMPA_MIN_NI (-20)

#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)

#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // MMPA_TYPEDEF_LINUX_H_

+ 83
- 0
third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_typedef_win.h View File

@@ -0,0 +1,83 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)
#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64
#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0
#define MM_MUTEX_INITIALIZER NULL
#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_

+ 566
- 0
third_party/fwkacllib/inc/inc/mmpa/sub_inc/mmpa_win.h View File

@@ -0,0 +1,566 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MMPA_WIN_MMPA_WIN_H
#define MMPA_WIN_MMPA_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifdef MMPA_DLL
#define MMPA_DLL_API __declspec(dllexport)
#else
#define MMPA_DLL_API __declspec(dllimport)
#endif

#define MMPA_MACINFO_DEFAULT_SIZE 18
#define MMPA_CPUDESC_DEFAULT_SIZE 64

#pragma section(".CRT$XCU", long, read)
#pragma section(".CRT$XPU", long, read)

typedef HANDLE mmMutex_t;
typedef HANDLE mmThread;
typedef HANDLE mmProcess;
typedef HANDLE mmPollHandle;
typedef HANDLE mmPipeHandle;
typedef HANDLE mmFileHandle;
typedef HANDLE mmCompletionHandle;
typedef HANDLE mmFd_t;
typedef CRITICAL_SECTION mmMutexFC;
typedef CONDITION_VARIABLE mmCond;

typedef VOID *(*userProcFunc)(VOID *pulArg);
typedef struct {
userProcFunc procFunc;
VOID *pulArg;
} mmUserBlock_t;

typedef DWORD mmThreadKey;
typedef SYSTEMTIME mmSystemTime_t;

typedef HANDLE mmSem_t;
typedef SOCKET mmSockHandle;
typedef SRWLOCK mmRWLock_t;
typedef struct sockaddr mmSockAddr;
typedef int mmSocklen_t;
typedef int mmSemTimeout_t;
typedef long mmAtomicType;
typedef long long mmAtomicType64;
typedef DWORD mmExitCode;
typedef DWORD mmErrorMsg;
typedef int mmKey_t;
typedef HANDLE mmMsgid;
typedef long int mmOfft_t;
typedef int mmPid_t;

typedef INT32 mmSsize_t;
typedef int mmSize; // size
typedef size_t mmSize_t;
typedef VOID mmshmId_ds;
typedef long long MM_LONG;

typedef enum {
DT_DIR = FILE_ATTRIBUTE_DIRECTORY,
} mmDtype;

typedef struct {
unsigned char d_type;
char d_name[MAX_PATH]; // file name
} mmDirent;

typedef struct {
unsigned long d_type;
char d_name[MAX_PATH]; // file name
} mmDirent2;

typedef int (*mmFilter)(const mmDirent *entry);
typedef int (*mmFilter2)(const mmDirent2 *entry);
typedef int (*mmSort)(const mmDirent **a, const mmDirent **b);
typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b);

typedef struct {
VOID *sendBuf;
INT32 sendLen;
} mmIovSegment;
typedef PVOID mmInAddr;

typedef enum {
pollTypeRead = 1, // pipeline reading
pollTypeRecv, // socket receive
pollTypeIoctl, // ioctl read
} mmPollType;

typedef struct {
HANDLE completionHandle;
mmPollType overlapType;
OVERLAPPED oa;
} mmComPletionKey, *pmmComPletionKey;

typedef struct {
VOID *priv; // User defined private content
mmPollHandle bufHandle; // Value of handle corresponding to buf
mmPollType bufType; // Data types polled to
VOID *buf;
UINT32 bufLen;
UINT32 bufRes;
} mmPollData, *pmmPollData;

typedef VOID (*mmPollBack)(pmmPollData);
typedef struct {
mmPollHandle handle; // The file descriptor or handle of poll is required
mmPollType pollType; // Operation type requiring poll,read or recv or ioctl
INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL
mmComPletionKey completionKey; // The default value is blank, which will be used in windows to receive the data with
// different handle
} mmPollfd;

typedef struct {
OVERLAPPED oa;
HANDLE completionHandle;
WSABUF DataBuf;
} PRE_IO_DATA, *PPRE_IO_DATA;

typedef OVERLAPPED mmOverLap;

typedef struct {
UINT32 createFlag;
INT32 oaFlag; // Overlap operation is supported if it is not 0
} mmCreateFlag;

typedef struct {
VOID *inbuf;
INT32 inbufLen;
VOID *outbuf;
INT32 outbufLen;
mmOverLap *oa;
} mmIoctlBuf;

typedef struct {
HANDLE timerQueue;
HANDLE timerHandle;
} mmTimerHandle;

typedef struct {
LONG tv_sec;
LONG tv_usec;
} mmTimeval;

typedef struct {
INT32 tz_minuteswest; // How many minutes is it different from Greenwich
INT32 tz_dsttime; // DST correction type
} mmTimezone;

typedef struct {
MM_LONG tv_sec;
MM_LONG tv_nsec;
} mmTimespec;

typedef mmTimerHandle mmTimer;

#define mmTLS __declspec(thread)

typedef struct stat mmStat_t;
typedef struct _stat64 mmStat64_t;
typedef int mmMode_t;

typedef int MODE;

typedef struct {
const char *name;
int has_arg;
int *flag;
int val;
} mmStructOption;

typedef struct {
ULONGLONG totalSize;
ULONGLONG freeSize;
ULONGLONG availSize;
} mmDiskSize;

typedef struct {
const char *dli_fname;
void *dli_fbase;
const char *dli_sname;
void *dli_saddr;
size_t dli_size; /* ELF only */
int dli_bind; /* ELF only */
int dli_type;
} mmDlInfo;

typedef struct {
char addr[MMPA_MACINFO_DEFAULT_SIZE]; // ex:aa-bb-cc-dd-ee-ff\0
} mmMacInfo;

typedef struct {
char arch[MMPA_CPUDESC_DEFAULT_SIZE];
char manufacturer[MMPA_CPUDESC_DEFAULT_SIZE]; // vendor
char version[MMPA_CPUDESC_DEFAULT_SIZE]; // modelname
INT32 frequency; // cpu frequency
INT32 maxFrequency; // max speed
INT32 ncores; // cpu cores
INT32 nthreads; // cpu thread count
INT32 ncounts; // logical cpu nums
} mmCpuDesc;

typedef struct {
char **argv;
INT32 argvCount;
char **envp;
INT32 envpCount;
} mmArgvEnv;

// Windows currently does not support properties other than thread separation properties
typedef struct {
INT32 detachFlag; // Thread detach property: 0 do not detach 1 detach
INT32 priorityFlag;
INT32 priority;
INT32 policyFlag;
INT32 policy;
INT32 stackFlag;
UINT32 stackSize;
} mmThreadAttr;

typedef VOID (*mmPf)(VOID);

#define mm_no_argument 0
#define mm_required_argument 1
#define mm_optional_argument 2

#define M_FILE_RDONLY GENERIC_READ
#define M_FILE_WRONLY GENERIC_WRITE
#define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
#define M_FILE_CREAT OPEN_ALWAYS

#define M_RDONLY _O_RDONLY
#define M_WRONLY _O_WRONLY
#define M_RDWR _O_RDWR
#define M_IRWXU _O_RDWR
#define M_CREAT _O_CREAT
#define M_BINARY _O_BINARY
#define M_TRUNC _O_TRUNC
#define M_APPEND _O_APPEND

#define M_IREAD _S_IREAD
#define M_IRUSR _S_IREAD
#define M_IWRITE _S_IWRITE
#define M_IWUSR _S_IWRITE
#define M_IXUSR 0

#define M_IN_CREATE FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME
#define M_IN_CLOSE_WRITE FILE_NOTIFY_CHANGE_LAST_WRITE
#define M_IN_IGNORED FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME

#define M_OUT_CREATE 0x00000100
#define M_OUT_CLOSE_WRITE 0x00000008
#define M_OUT_IGNORED 0x00008000
#define M_OUT_ISDIR 0x40000000

#define M_MSG_CREAT 1
#define M_MSG_EXCL 2
#define M_MSG_NOWAIT 3

#define M_WAIT_NOHANG 1
#define M_WAIT_UNTRACED 2

#define M_UMASK_USRREAD _S_IREAD
#define M_UMASK_GRPREAD _S_IREAD
#define M_UMASK_OTHREAD _S_IREAD

#define M_UMASK_USRWRITE _S_IWRITE
#define M_UMASK_GRPWRITE _S_IWRITE
#define M_UMASK_OTHWRITE _S_IWRITE

#define M_UMASK_USREXEC 0
#define M_UMASK_GRPEXEC 0
#define M_UMASK_OTHEXEC 0

#define DT_UNKNOWN 0
#define DT_FIFO 1
#define DT_CHR 2
#define DT_BLK 6
#define DT_REG 8
#define DT_LNK 10
#define DT_SOCK 12
#define DT_WHT 14
#define MM_DT_DIR 16
#define MM_DT_REG 32

#define mmConstructor(x) __declspec(allocate(".CRT$XCU")) mmPf con = x
#define mmDestructor(x) __declspec(allocate(".CRT$XPU")) mmPf de = x

#define MMPA_PRINT_ERROR ((opterr) && (*options != ':'))
#define MMPA_FLAG_PERMUTE 0x01 // permute non-options to the end of argv
#define MMPA_FLAG_ALLARGS 0x02 // treat non-options as args to option "-1"
#define MMPA_FLAG_LONGONLY 0x04 // operate as getopt_long_only
// return values
#define MMPA_BADCH (INT32)'?'
#define MMPA_BADARG ((*options == ':') ? (INT32)':' : (INT32)'?')
#define MMPA_INORDER (INT32)1

#define MMPA_NO_ARGUMENT 0
#define MMPA_REQUIRED_ARGUMENT 1
#define MMPA_OPTIONAL_ARGUMENT 2

#define MMPA_EMSG ""
#define MMPA_MAX_PATH MAX_PATH
#define M_NAME_MAX _MAX_FNAME

#define M_F_OK 0
#define M_X_OK 1
#define M_W_OK 2
#define M_R_OK 4

#define MMPA_STDIN stdin
#define MMPA_STDOUT stdout
#define MMPA_STDERR stderr

#define MMPA_RTLD_NOW 0
#define MMPA_RTLD_GLOBAL 0
#define MMPA_RTLD_LAZY 0
#define MMPA_RTLD_NODELETE 0

#define MMPA_DL_EXT_NAME ".dll"

#define __attribute__(v)

MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock);
MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle);
MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock);
MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex);
MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond);

MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond);
MMPA_FUNC_VISIBILITY INT32 mmGetPid(VOID);
MMPA_FUNC_VISIBILITY INT32 mmGetTid(VOID);
MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle);
MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime);
MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime);
MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value);
MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem);
MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode);
MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type);
MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd);
MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream);
MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen);
MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen);
MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol);
MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog);
MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen);
MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen);
MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
VOID *sendMsg,
INT32 sendLen,
UINT32 sendFlag,
const mmSockAddr* addr,
INT32 tolen);
MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
VOID *recvBuf,
mmSize recvLen,
UINT32 recvFlag,
mmSockAddr* addr,
mmSocklen_t *FromLen);
MMPA_FUNC_VISIBILITY INT32 mmSAStartup(VOID);
MMPA_FUNC_VISIBILITY INT32 mmSACleanup(VOID);
MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT mode);
MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *fileName);
MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
MMPA_FUNC_VISIBILITY CHAR *mmDlerror(VOID);
MMPA_FUNC_VISIBILITY INT32
mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer);
MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode);
MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond);
MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock);
MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid);
MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio);
MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle);
MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio);
MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName);
MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode);
MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName);

MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr);
MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout);
MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT32 iovcnt);
MMPA_FUNC_VISIBILITY VOID mmMb();
MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr);

MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag);
MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len);
MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len);
MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId);

MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value);
MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock);

MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode);
MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]);

MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode);
MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);

MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
pmmPollData polledData, mmPollBack pollBack);

MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount();
MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath);

MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen);

MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd);
MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd);
MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename);
MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode);
MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream);
MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort);
MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort);
MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count);
MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count);

MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag);
MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag);
MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);
MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag);

MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid);

MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result);
MMPA_FUNC_VISIBILITY INT32 mmGetOptErr();
MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr);
MMPA_FUNC_VISIBILITY INT32 mmGetOptInd();
MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd);
MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt();
MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
MMPA_FUNC_VISIBILITY INT32
mmGetOptLong(INT32 argc, CHAR *const *argv, const CHAR *opts, const mmStructOption *longopts, INT32 *longindex);

MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);

MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *));
MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value);
MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key);
MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key);
MMPA_FUNC_VISIBILITY INT32 mmGetOsType();

MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd);
MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd);
MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path);
MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode);
MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options);

MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen);
MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr);

MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len);
MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite);
MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path);
MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path);
MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize);

MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name);
MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size);

/*
* Function: set the thread name of the currently executing thread - internal call of thread, which is not supported
* under Windows temporarily, and is null.
* Input: name: the thread name to be set
* The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the
* execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name);

/*
* Function: Get the thread name of the currently executing thread - thread body call, not supported under windows, null
* implementation.
* Input:name:The name of the thread to get, and the cache is allocated by the user,size>=MMPA_THREADNAME_SIZE.
* The input parameter error returns EN_INVALID_PARAM, the execution success returns
* EN_OK, and the execution failure returns EN_ERROR
*/
MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size);

MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length);
MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName);
MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize);
MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength);
MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
MMPA_FUNC_VISIBILITY INT32
mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, mmProcess *id);

MMPA_FUNC_VISIBILITY INT32
mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, const mmThreadAttr *threadAttr);
MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
#ifdef __cplusplus
#if __cplusplus
}
#endif /* __cpluscplus */
#endif // __cpluscplus

#endif // MMPA_WIN_MMPA_WIN_H_

+ 78
- 0
third_party/fwkacllib/inc/inc/ops/aipp.h View File

@@ -0,0 +1,78 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file aipp.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_
#define OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_

#include "graph/operator_reg.h"

namespace ge {
/**
*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC),
image normalization (by subtracting the mean value or multiplying a factor), image cropping
(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n

*@par Inputs:
*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer.
*@li params: Dynamic AIPP configuration parameters of type uint8. \n

*@par Attributes:
*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n

*@par Outputs:
*features: The AIPP-processed output tensor of type float16 or uint8.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*@par Restrictions:
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
*/
REG_OP(Aipp)
.INPUT(images, TensorType{DT_UINT8})
.OPTIONAL_INPUT(params, TensorType{DT_UINT8})
.OUTPUT(features, TensorType({DT_FLOAT16, DT_UINT8}))
.ATTR(aipp_config_path, String, "./aipp.cfg")
.OP_END_FACTORY_REG(Aipp)

/**
*@brief Performs this op is for dynamic aipp.If you set aipp-mode to dynamic
in aipp config file, framework will auto add one input node to graph at last. \n

*@par Inputs:
*data: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. \n

*@par Attributes:
*index: specify aipp serial num \n

*@par Outputs:
*out: The AIPP-processed output tensor of all types. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator AippData.
*@par Restrictions:
*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
*/
REG_OP(AippData)
.INPUT(data, TensorType::ALL())
.OUTPUT(out, TensorType::ALL())
.ATTR(index, Int, 0)
.OP_END_FACTORY_REG(AippData)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_

+ 80
- 0
third_party/fwkacllib/inc/inc/ops/all_ops.h View File

@@ -0,0 +1,80 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file all_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_

#include "aipp.h"
#include "array_ops.h"
#include "audio_ops.h"
#include "batch_ops.h"
#include "bitwise_ops.h"
#include "boosted_trees_ops.h"
#include "candidate_sampling_ops.h"
#include "control_flow_ops.h"
#include "ctc_ops.h"
#include "data_flow_ops.h"
#include "elewise_calculation_ops.h"
#include "functional_ops.h"
#include "get_data_ops.h"
#include "hcom_ops.h"
#include "hvd_ops.h"
#include "image_ops.h"
#include "internal_ops.h"
#include "linalg_ops.h"
#include "list_ops.h"
#include "logging_ops.h"
#include "lookup_ops.h"
#include "math_ops.h"
#include "matrix_calculation_ops.h"
#include "nn_batch_norm_ops.h"
#include "nn_calculation_ops.h"
#include "nn_detect_ops.h"
#include "nn_norm_ops.h"
#include "nn_ops.h"
#include "nn_pooling_ops.h"
#include "nn_training_ops.h"
#include "nonlinear_fuc_ops.h"
#include "no_op.h"
#include "npu_loss_scale_ops.h"
#include "outfeed_ops.h"
#include "pad_ops.h"
#include "parsing_ops.h"
#include "quantize_ops.h"
#include "ragged_conversion_ops.h"
#include "random_ops.h"
#include "reduce_ops.h"
#include "resource_variable_ops.h"
#include "rnn.h"
#include "rpn_ops.h"
#include "save_ops.h"
#include "selection_ops.h"
#include "set_ops.h"
#include "sparse_ops.h"
#include "split_combination_ops.h"
#include "stateful_random_ops.h"
#include "stateless_random_ops.h"
#include "state_ops.h"
#include "string_ops.h"
#include "swap_co_ops.h"
#include "transformation_ops.h"
#include "condtake_ops.h"
#include "warp_perspective_ops.h"
#endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_

+ 1231
- 0
third_party/fwkacllib/inc/inc/ops/array_ops.h
File diff suppressed because it is too large
View File


+ 162
- 0
third_party/fwkacllib/inc/inc/ops/audio_ops.h View File

@@ -0,0 +1,162 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file audio_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Mel-Frequency Cepstral Coefficient (MFCC) calculation consists of
taking the DCT-II of a log-magnitude mel-scale spectrogram . \n

*@par Inputs:
*Input "spectrogram" is a 3D tensor. Input "sample_rate" is a scalar.
* @li spectrogram: A 3D float tensor.
* @li sample_rate: The MFCC sample rate . \n

*@par Attributes:
*@li upper_frequency_limit: The highest frequency for calculation.
*@li lower_frequency_limit: The lowest frequency for calculation.
*@li filterbank_channel_count: Resolution of the Mel bank.
*@li dct_coefficient_count: Number of output channels to produce
per time slice . \n

*@par Outputs:
*y: A Tensor of type float32 . \n

*@attention Constraints:
*Mfcc runs on the Ascend AI CPU, which delivers poor performance.

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Mfcc . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(Mfcc)
.INPUT(spectrogram, TensorType({DT_FLOAT}))
.INPUT(sample_rate, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(upper_frequency_limit, Float, 4000)
.ATTR(lower_frequency_limit, Float, 20)
.ATTR(filterbank_channel_count, Int, 40)
.ATTR(dct_coefficient_count, Int, 13)
.OP_END_FACTORY_REG(Mfcc)

/**
*@brief Decodes and generates spectrogram using wav float tensor . \n

*@par Inputs:
*Input "x" is a 2D matrix.
* x: A float tensor. Float representation of audio data . \n

*@par Attributes:
*@li window_size: Size of the spectrogram window.
*@li stride: Size of the spectrogram stride.
*@li magnitude_squared: If true, uses squared magnitude . \n

*@par Outputs:
*spectrogram: A 3D float Tensor . \n

*@attention Constraints:
*AudioSpectrogram runs on the Ascend AI CPU, which delivers
poor performance . \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator AudioSpectrogram . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(AudioSpectrogram)
.INPUT(x, TensorType({DT_FLOAT}))
.OUTPUT(spectrogram, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(window_size, Int)
.REQUIRED_ATTR(stride, Int)
.ATTR(magnitude_squared, Bool, false)
.OP_END_FACTORY_REG(AudioSpectrogram)

/**
*@brief Decodes a 16-bit WAV file into a float tensor . \n

*@par Inputs:
*contents: A Tensor of type string. The WAV-encoded audio, usually from a file . \n

*@par Attributes:
*@li desired_channels: An optional int. Defaults to "-1".
Number of sample channels wanted.
*@li desired_samples: An optional int. Defaults to "-1".
Length of audio requested . \n

*@par Outputs:
*@li *audio: A Tensor of type float32.
*@li *sample_rate: A Tensor of type int32 . \n

*@attention Constraints:
*DecodeWav runs on the Ascend AI CPU, which delivers poor performance.

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator DecodeWav . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(DecodeWav)
.INPUT(contents, TensorType({DT_STRING}))
.OUTPUT(audio, TensorType({DT_FLOAT}))
.OUTPUT(sample_rate, TensorType({DT_INT32}))
.ATTR(desired_channels, Int, -1)
.ATTR(desired_samples, Int, -1)
.OP_END_FACTORY_REG(DecodeWav)

/**
*@brief Encode audio data using the WAV file format . \n

*@par Inputs:
*Including:
* @li audio: A Tensor of type DT_FLOAT.
* @li sample_rate: A Tensor of type DT_INT32 . \n

*@par Outputs:
*contents: A Tensor of type DT_STRING . \n

*@attention Constraints:
*EncodeWav runs on the Ascend AI CPU, which delivers poor performance.

*@par Third-party framework compatibility
*Compatible with tensorflow Operator EncodeWav . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(EncodeWav)
.INPUT(audio, TensorType({DT_FLOAT}))
.INPUT(sample_rate, TensorType({DT_INT32}))
.OUTPUT(contents, TensorType({DT_STRING}))
.OP_END_FACTORY_REG(EncodeWav)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_

+ 166
- 0
third_party/fwkacllib/inc/inc/ops/batch_ops.h View File

@@ -0,0 +1,166 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file batch_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Creates batches of tensors in "x_tensors" . \n

*@par Inputs:
*Input "x_tensors" is a list or a dictionary of tensors.
*x_tensors: The list or dictionary of tensors to enqueue .
It's a dynamic input \n

*@par Attributes:
*@li num_batch_threads: The number of threads enqueuing "x_tensors".
The batching will be nondeterministic if "num_batch_threads" > 1.
*@li max_batch_size: The maximum batch size pulled from the queue.
*@li max_enqueued_batches: The maximum number of batches pulled from the queue.
*@li batch_timeout_micros: The batch processing timeout, in microseconds.
*@li allowed_batch_sizes: The allowed batch size pulled from the queue.
*@li grad_timeout_micros: The gradient batch processing timeout,
in microseconds.
*@li container: If non-empty, this queue is placed in the given container.
Otherwise, a default container is used.
*@li shared_name: If set, this queue will be shared under the given name
across multiple sessions.
*@li batching_queue: The queue resource container . \n

*@par Outputs:
*@li y_index: A Tensor. The index of a BatchTensor. Must be in row-major order.
*@li y_id: A Tensor. The ID of a BatchTensor. Must be in row-major order.
*@li y_tensors: A list or dictionary of tensors with
the same types as "x_tensors" . It's a dynamic output. \n

*@attention Constraints:
*Batch runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Batch.
*/

REG_OP(Batch)
.DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
.OUTPUT(y_index, TensorType({ DT_INT64 }))
.OUTPUT(y_id, TensorType({ DT_INT64 }))
.DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
.REQUIRED_ATTR(num_batch_threads, Int)
.REQUIRED_ATTR(max_batch_size, Int)
.ATTR(max_enqueued_batches, Int, 10)
.REQUIRED_ATTR(batch_timeout_micros, Int)
.ATTR(allowed_batch_sizes, ListInt, {})
.REQUIRED_ATTR(grad_timeout_micros, Int)
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.ATTR(batching_queue, String, "")
.OP_END_FACTORY_REG(Batch)

/**
*@brief Reverses the operation of Batch for a single output Tensor . \n

*@par Inputs:
*Input "x_tensors" is a list or a dictionary of tensors.
* @li x_tensors: The list or dictionary of tensors to enqueue.
* @li index: The matching "batch_index" obtained from Batch.
* @li id: The "id" scalar emitted by Batch . \n

*@par Attributes:
*@li timeout_micros: The unbatch processing timeout, in microseconds.
*@li container: If non-empty, this queue is placed in the given container.
Otherwise, a default container is used.
*@li shared_name: If set, this queue will be shared under the given name
across multiple sessions . \n

*@par Outputs:
*y_tensor: A list or dictionary of tensors with the same types as "x_tensors" . \n

*@attention Constraints:
*Unbatch runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Unbatch.
*/

REG_OP(Unbatch)
.INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(index, TensorType({DT_INT64}))
.INPUT(id, TensorType({DT_INT64}))
.OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.REQUIRED_ATTR(timeout_micros, Int)
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.OP_END_FACTORY_REG(Unbatch)

/**
*@brief Acts like Batch but using the given "batch_index" index of batching
things as they become available . \n

*@par Inputs:
*Input "x_input" is a list or a dictionary of tensors.
* @li x_input: The input to the Unbatch operation.
* @li index: The batch_index given to the Unbatch operation.
* @li id: The "id" scalar emitted by Batch.
* @li grad: The downstream gradient . \n

*@par Attributes:
*@li container: If non-empty, this queue is placed in the given container.
Otherwise, a default container is used.
*@li shared_name: If set, this queue will be shared under the given name
across multiple sessions . \n

*@par Outputs:
*y_grad: The return value, either an empty tensor or the batched gradient . \n

*@attention Constraints:
*UnbatchGrad runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator UnbatchGrad.
*/

REG_OP(UnbatchGrad)
.INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(index, TensorType({DT_INT64}))
.INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(id, TensorType({DT_INT64}))
.OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.OP_END_FACTORY_REG(UnbatchGrad)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_

+ 59
- 0
third_party/fwkacllib/inc/inc/ops/bitwise_ops.h View File

@@ -0,0 +1,59 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file bitwise_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Element-wise computes the bitwise right-shift of x and y . \n

*@par Inputs:
*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
are 0D scalars.
* @li x: A Tensor. Must be one of the following types: int8, int16, int32,
int64, uint8, uint16, uint32, uint64.
* @li y: A Tensor. Has the same type as "x". \n

*@par Outputs:
* z: A Tensor. Has the same type as "x". \n

*@attention Constraints:
*Unique runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator RightShift.
*/

REG_OP(RightShift)
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
.OP_END_FACTORY_REG(RightShift)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_

+ 64
- 0
third_party/fwkacllib/inc/inc/ops/boosted_trees_ops.h View File

@@ -0,0 +1,64 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file boosted_trees_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Bucketizes each feature based on bucket boundaries . \n

*@par Inputs:
*Input "float_values" is a 1D tensor. Input "bucket_boundaries" is
a list of 1D tensors. It's a dynamic input.
* @li float_values: A list of rank 1 tensors each containing float
values for a single feature.
* @li bucket_boundaries: A list of rank 1 tensors each containing
the bucket boundaries for a single feature . It's a dynamic input. \n

*@par Attributes:
*@li num_features: Number of features

*@par Outputs:
*@li y: A list of rank 1 tensors each containing the bucketized values for
a single feature . \n

*@attention Constraints:
*BoostedTreesBucketize runs on the Ascend AI CPU, which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator BoostedTreesBucketize . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(BoostedTreesBucketize)
.DYNAMIC_INPUT(float_values, TensorType({DT_FLOAT}))
.DYNAMIC_INPUT(bucket_boundaries, TensorType({DT_FLOAT}))
.DYNAMIC_OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(num_features, Int)
.OP_END_FACTORY_REG(BoostedTreesBucketize)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_

+ 415
- 0
third_party/fwkacllib/inc/inc/ops/candidate_sampling_ops.h View File

@@ -0,0 +1,415 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file candidate_sampling_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Generates labels for candidate sampling with
a learned unigram distribution. \n

*@par Inputs:
*Input "true_classes" is a 2D matrix.
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection,
so that all sampled candidates in a batch are unique.
*This requires some approximation to estimate the post-rejection
sampling probabilities.
*@li range_max: The sampler will sample integers from the interval
[0, range_max).
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled", in which each
element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing
the number of times each candidate is expected to occur in a batch of sampled
candidates. If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled",
for each sampled candidate.
*representing the number of times the candidate is expected to occur
in a batch of sampled candidates.
* If "unique" is true, then this is a probability.

*@attention Constraints:
*ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU,
which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ThreadUnsafeUnigramCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.REQUIRED_ATTR(num_sampled, Int)
.REQUIRED_ATTR(unique, Bool)
.REQUIRED_ATTR(range_max, Int)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(ThreadUnsafeUnigramCandidateSampler)

/**
*@brief Generates labels for candidate sampling with a learned
unigram distribution. \n

*@par Inputs:
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label.
*Input "true_classes" is a 2D matrix. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection,
so that all sampled candidates in a batch are unique.
*This requires some approximation to estimate the post-rejection
sampling probabilities.
*@li range_max: The sampler will sample integers from the interval
[0, range_max).
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled",
in which each element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing the
number of times each candidate is expected to occur
in a batch of sampled candidates.
*If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled", for each
sampled candidate representing the number of times.
* the candidate is expected to occur in a batch of sampled candidates.
*If "unique" is true, then this is a probability. \n

*@attention Constraints:
*UniformCandidateSampler runs on the Ascend AI CPU,
which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator UniformCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(UniformCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.REQUIRED_ATTR(num_sampled, Int)
.REQUIRED_ATTR(unique, Bool)
.REQUIRED_ATTR(range_max, Int)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(UniformCandidateSampler)

/**
*@brief Generates labels for candidate sampling with a learned
unigram distribution. \n

*@par Inputs:
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label.
* Input "true_classes" is a 2D matrix. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection,
so that all sampled candidates in a batch are unique. This requires
some approximation to estimate the post-rejection sampling probabilities.
*@li range_max: The sampler will sample integers from the interval [0, range_max).
*@li vocab_file: Each valid line in this file (which should have a
CSV-like format) corresponds to a valid word ID.
*IDs are in sequential order, starting from num_reserved_ids.
*@li distortion: The distortion is used to skew the unigram probability
distribution. Each weight is first raised to the distortion's power before
adding to the internal unigram distribution.
*@li num_reserved_ids: Optionally some reserved IDs can be added in the range
[0, ..., num_reserved_ids) by the users.
* One use case is that a special unknown word token is used as ID 0.
*@li num_shards: A sampler can be used to sample from a subset of the
original range. in order to speed up the whole computation through parallelism.
*@li shard: A sampler can be used to sample from a subset of the original
range in order to speed up the whole computation through parallelism.
*@li unigrams: A list of unigram counts or probabilities, one per ID in
sequential order.
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled", in which each
element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing the
number of times each candidate is expected to occur in a batch of sampled
candidates. If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled",
for each sampled candidate representing the number of times the candidate is
expected to occur in a batch of sampled candidates.
If "unique" is true, then this is a probability. \n

*@attention Constraints:
* FixedUnigramCandidateSampler runs on the Ascend AI CPU,
which delivers poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator FixedUnigramCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(FixedUnigramCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.ATTR(num_true, Int, 0)
.ATTR(num_sampled, Int, 0)
.ATTR(unique, Bool, false)
.ATTR(range_max, Int, 0)
.ATTR(vocab_file, String, "")
.ATTR(distortion, Float, 1.0)
.ATTR(num_reserved_ids, Int, 0)
.ATTR(num_shards, Int, 1)
.ATTR(shard, Int, 0)
.REQUIRED_ATTR(unigrams, ListFloat)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(FixedUnigramCandidateSampler)

/**
*@brief Generates labels for candidate sampling with a learned
unigram distribution. \n

*@par Inputs:
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label.
* Input "true_classes" is a 2D matrix. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection,
so that all sampled candidates in a batch are unique.
*This requires some approximation to estimate the post-rejection
sampling probabilities.
*@li range_max: The sampler will sample integers from the interval
[0, range_max).
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled", in which each
element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing
the number of times each candidate is expected to occur in a batch of sampled candidates.
*If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled", for each
sampled candidate representing the number of times the candidate is expected
to occur in a batch of sampled candidates.
*If "unique" is true, then this is a probability. \n

*@attention Constraints:
*LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers
poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LearnedUnigramCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.REQUIRED_ATTR(num_sampled, Int)
.REQUIRED_ATTR(unique, Bool)
.REQUIRED_ATTR(range_max, Int)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(LearnedUnigramCandidateSampler)

/**
*@brief Generates labels for candidate sampling with a log-uniform
distribution. \n

*@par Inputs:
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label.
* Input "true_classes" is a 2D matrix. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection, so that all
sampled candidates in a batch are unique. This requires some approximation
to estimate the post-rejection sampling probabilities.
*@li range_max: The sampler will sample integers from the interval
[0, range_max).
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled", in which each
element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing
the number of times each candidate is expected to occur in a batch of sampled
candidates. If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled", for each
sampled candidate representing the number of times the candidate is expected
to occur in a batch of sampled candidates.
*If "unique" is true, then this is a probability. \n

*@attention Constraints:
*LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers
poor performance. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator LogUniformCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LogUniformCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.REQUIRED_ATTR(num_sampled, Int)
.REQUIRED_ATTR(unique, Bool)
.REQUIRED_ATTR(range_max, Int)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(LogUniformCandidateSampler)

/**
*@brief Generates labels for candidate sampling with a learned
unigram distribution. \n

*@par Inputs:
*true_classes: A "batch_size * num_true" matrix, in which each row contains
the IDs of the "num_true" "target_classes" in the corresponding original label.
* Input "true_classes" is a 2D matrix. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li num_sampled: Number of candidates to randomly sample.
*@li unique: If "unique" is true, samples with rejection,
so that all sampled candidates in a batch are unique. This requires some
approximation to estimate the post-rejection sampling probabilities.
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
*@li sampled_candidates: A vector of length "num_sampled",
in which each element is the ID of a sampled candidate.
*@li true_expected_count: A "batch_size * num_true" matrix, representing the
number of times each candidate is expected to occur in a batch of sampled candidates.
*If "unique" is true, then this is a probability.
*@li sampled_expected_count: A vector of length "num_sampled", for each
sampled candidate representing the number of times the candidate is expected
to occur in a batch of sampled candidates. If "unique" is true, then this is a probability. \n

*@attention Constraints:
*AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance.

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator AllCandidateSampler. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(AllCandidateSampler)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.OUTPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(true_expected_count, TensorType({ DT_FLOAT }))
.OUTPUT(sampled_expected_count, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.REQUIRED_ATTR(num_sampled, Int)
.REQUIRED_ATTR(unique, Bool)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(AllCandidateSampler)

/**
*@brief Computes the "ids" of the positions in "sampled_candidates" that
match "true_labels". \n

*@par Inputs:
* @li Input "true_classes" is a 2D matrix.
* @li true_classes: The "true_classes" output of UnpackSparseLabels.
* @li sampled_candidates: The "sampled_candidates" output of CandidateSampler. \n

*@par Attributes:
*@li num_true: Number of true labels per context.
*@li seed: If either "seed" or "seed2" are set to be non-zero.
*@li seed2: A second seed to avoid seed collision. \n

*@par Outputs:
* @li indices: A vector of indices corresponding to rows of "true_candidates".
* @li ids: A vector of IDs of positions in "sampled_candidates" that match a
"true_label" for the row with the corresponding index in indices.
* @li weights: A vector of the same length as "indices" and "ids", in which
each element is -FLOAT_MAX. \n

*@attention Constraints:
*ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance.

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator ComputeAccidentalHits. \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ComputeAccidentalHits)
.INPUT(true_classes, TensorType({ DT_INT64 }))
.INPUT(sampled_candidates, TensorType({ DT_INT64 }))
.OUTPUT(indices, TensorType({ DT_INT32 }))
.OUTPUT(ids, TensorType({ DT_INT64 }))
.OUTPUT(weights, TensorType({ DT_FLOAT }))
.REQUIRED_ATTR(num_true, Int)
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.OP_END_FACTORY_REG(ComputeAccidentalHits)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_

+ 59
- 0
third_party/fwkacllib/inc/inc/ops/condtake_ops.h View File

@@ -0,0 +1,59 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file condtake_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {
/**
*@brief Take elements from data if specific condition is satisfied on mask. \n

*@par Inputs:
*@li data: input tensor from which to take elements, High-dimension input would
first be flattened.
*@li mask: condition param; must be the same shape with data. \n

*@par Attributes:
*@li mode:convert by convert in Mode.
*@li val:convert by <class 'float'>
*@li eps:convert by <class 'float'> (default: 1e-06) \n

*@par Outputs:
*@li out_data: the elements taken
*@li out_index: the indices corresponding to those elements
*@li valid_num: elements of out_data and out_index from zeros to valid_num is valid.
*/

REG_OP(CondTake)
.INPUT(data, TensorType({DT_FLOAT}))
.INPUT(mask, TensorType({DT_FLOAT}))
.OUTPUT(out_data, TensorType({DT_FLOAT}))
.OUTPUT(out_index, TensorType({DT_INT32}))
.OUTPUT(valid_num, TensorType({DT_INT32}))
.REQUIRED_ATTR(mode, String)
.REQUIRED_ATTR(val, Float)
.ATTR(eps, Float, 1e-06)
.OP_END_FACTORY_REG(CondTake)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_

+ 407
- 0
third_party/fwkacllib/inc/inc/ops/control_flow_ops.h View File

@@ -0,0 +1,407 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file control_flow_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Forwards the value of an available tensor from input "x" to output "y".
* Merge waits for at least one of the input tensors to become available.
* It is usually combined with Switch to implement branching.
* Merge forwards the first tensor to become available to output "y",
* and sets "value_index" the index of the tensor in inputs . \n

*@par Inputs:
*x: The input tensors, one of which will become available.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n

*@par Outputs:
*@li y: The available tensor. Has the same type as "x".
*@li value_index: A scalar of type int32, for the index of the chosen input
* tensor . \n

*@see Switch()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Merge.
*/
REG_OP(Merge)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(value_index, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(Merge)

/**
*@brief Forwards the value of an available tensor from input "x" to output "y".
* Merge waits for at least one of the input tensors to become available.
* It is usually combined with Switch to implement branching.
* Merge forwards the first tensor to become available to output "y",
* and sets "value_index" the index of the tensor in inputs . \n

*@par Inputs:
*x: The input tensors, one of which will become available.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . It's a dynamic input. \n

*@par Outputs:
*@li y: The available tensor. Has the same type as "x".
*@li value_index: A scalar of type int32, for the index of the chosen input
* tensor . \n

*@see Switch() | Merge()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator RefMerge.
*/
REG_OP(RefMerge)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(value_index, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(RefMerge)

/**
*@brief Forwards "data" to the output port determined by "pred".
* If "pred" is "true", the data input is forwarded to "output_true".
* Otherwise, the data is forwarded to "output_false" . \n

*@par Inputs:
*@li data: The tensor to be forwarded. \ n
* Must be one of the following types: float16, float32, float64,
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
*@li pred: A boolean scalar. The output port that will receive data . \n

*@par Outputs:
*@li output_false: If "pred" is "false", data will be forwarded to this output.
* Has the same type as "data".
*@li output_true: If "pred" is "true", data will be forwarded to this output.
* Has the same type as "data" . \n

*@see Merge()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Switch.
*/
REG_OP(Switch)
.INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.INPUT(pred, TensorType({DT_BOOL}))
.OUTPUT(output_false, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(output_true, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(Switch)

/**
*@brief Forwards "data" to the output port determined by "pred".
* If "pred" is "true", the data input is forwarded to "output_true".
* Otherwise, the data is forwarded to "output_false" . \n

*@par Inputs:
*@li data: The ref tensor to be forwarded.
* Must be one of the following types: float16, float32, float64,
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
*@li pred: A boolean scalar. The output port that will receive data . \n

*@par Outputs:
*@li output_false: If "pred" is "false", data will be forwarded to this output.
* Has the same type as "data".
*@li output_true: If "pred" is "true", data will be forwarded to this output.
* Has the same type as "data" . \n

*@see Merge() | Switch()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator RefSwitch.
*/
REG_OP(RefSwitch)
.INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.INPUT(pred, TensorType({DT_BOOL}))
.OUTPUT(output_false, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(output_true, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(RefSwitch)

/**
*@brief Forwards "data" to the output port determined by "pred_value" . \n

*@par Inputs:
*@li data: The tensor to be forwarded. \ n
* Must be one of the following types: float16, float32, float64,
* int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
*@li pred_value: A int64 tensor which determines the output port that will receive data . \n

*@par Outputs:
*output: The output tensors, one of which will become available.
* Has the same type as "data".
*/
REG_OP(SwitchN)
.INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.INPUT(pred_value, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(SwitchN)

/**
*@brief Creates or finds a child frame, and makes "x" available to the child
* frame. This op is used together with Exit to create loops in the graph.
* The Executor uses the unique "frame_name" to identify frames.
* If "is_constant" is "true", output "y" is a constant in the child
* frame; otherwise it may be changed in the child frame . \n

*@par Inputs:
*x: The tensor to be made available to the child frame.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Attributes:
*@li frame_name: A required string. The name of the child frame.
*@li is_constant: A required bool. If true, the output is constant in
* the child frame . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@see Exit()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Enter.
*/
REG_OP(Enter)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.REQUIRED_ATTR(frame_name, String)
.REQUIRED_ATTR(is_constant, Bool)
.OP_END_FACTORY_REG(Enter)

/**
*@brief Creates or finds a child frame, and makes "x" available to the child
* frame. This op is used together with Exit to create loops in the graph.
* The Executor uses the unique "frame_name" to identify frames.
* If "is_constant" is "true", output "y" is a constant in the child
* frame; otherwise it may be changed in the child frame . \n

*@par Inputs:
*x: The tensor to be made available to the child frame.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Attributes:
*@li frame_name: A required string. The name of the child frame.
*@li is_constant: A required bool. If true, the output is constant in
* the child frame . \n

*@par Outputs:
*y: A tensor. Has the same type as "x" . \n

*@see Exit() | Enter()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator RefEnter.
*/
REG_OP(RefEnter)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.REQUIRED_ATTR(frame_name, String)
.REQUIRED_ATTR(is_constant, Bool)
.OP_END_FACTORY_REG(RefEnter)

/**
*@brief Forwards the input to the output. This op represents the loop
* termination condition . \n

*@par Inputs:
*x: A boolean scalar. The condition of the Switch op . \n

*@par Outputs:
*y: The tensor "x" . \n

*@see Switch()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator LoopCond.
*/
REG_OP(LoopCond)
.INPUT(x, TensorType({DT_BOOL}))
.OUTPUT(y, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(LoopCond)

/**
*@brief Makes the input available to the next iteration . \n

*@par Inputs:
*x: The tensor to be made available to the next iteration.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator NextIteration.
*/
REG_OP(NextIteration)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(NextIteration)

/**
*@brief Makes the input available to the next iteration . \n

*@par Inputs:
*x: The tensor to be made available to the next iteration.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Outputs:
*y: A tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator RefNextIteration.
*/
REG_OP(RefNextIteration)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(RefNextIteration)

/**
*@brief Exits the current frame to its parent frame . \n

*@par Inputs:
*x: The tensor to be made available to the parent frame.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@see Enter()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Exit.
*/
REG_OP(Exit)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(Exit)

/**
*@brief Exits the current frame to its parent frame . \n

*@par Inputs:
*x: The tensor to be made available to the parent frame.
* Must be one of the following types: float16, float32, float64, int8,
* int16, int32, int64, uint8, uint16, uint32, uint64, bool . \n

*@par Outputs:
*y: A tensor. Has the same type as "x" . \n

*@see Enter() | Exit()

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator RefExit.
*/
REG_OP(RefExit)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE,
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, DT_UINT16, DT_UINT32,
DT_UINT64, DT_BOOL}))
.OP_END_FACTORY_REG(RefExit)

/**
*@brief Only useful as a placeholder for control edges.
* It is similar to a no-op that always produces a live control output
* even when some control inputs are dead . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator ControlTrigger.
*/
REG_OP(ControlTrigger)
.OP_END_FACTORY_REG(ControlTrigger)

/**
*@brief Returns index of shape in the map.

*@par Inputs:
* Three inputs, including:
*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8.
*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried.
*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n

*@par Outputs:
*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map.
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(MapIndex)
.INPUT(x, TensorType({DT_INT32}))
.INPUT(data_seq, TensorType({DT_INT32}))
.OPTIONAL_INPUT(level_index, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(MapIndex)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_

+ 142
- 0
third_party/fwkacllib/inc/inc/ops/ctc_ops.h View File

@@ -0,0 +1,142 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file ctc_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_

#include "graph/operator.h"
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Calculates the CTC Loss (log probability) for each batch entry.
Also calculates the gradient. \n

*@par Inputs:
*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
*@li labels_indices: The indices of a `SparseTensor<int32, 2>`.
`labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
`(batch b, time t)`.
*@li labels_values: The values (labels) associated with the given batch and time.
*@li sequence_length: A vector containing sequence lengths (batch). \n

*@par Outputs:
*@li loss: A vector (batch) containing log-probabilities.
*@li gradient: The gradient of `loss`. 3-D, shape: `(max_time x
batch_size x num_classes)`. \n

*@par Attributes:
*@li preprocess_collapse_repeated: Scalar, if true then repeated labels are collapsed prior to
the CTC calculation.If not specified, defaults to false
*@li ctc_merge_repeated: Scalar. If set to false, *during* CTC calculation
repeated non-blank labels will not be merged and are interpreted as
individual labels. This is a simplified version of CTC.
If not specified, defaults to true. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow CTCLoss operator.
*/
REG_OP(CTCLoss)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(labels_indices, TensorType({DT_INT64}))
.INPUT(labels_values, TensorType({DT_INT32}))
.INPUT(sequence_length, TensorType({DT_INT32}))
.OUTPUT(loss, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(gradient, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(preprocess_collapse_repeated, Bool, false)
.ATTR(ctc_merge_repeated, Bool, true)
.ATTR(ignore_longer_outputs_than_inputs, Bool, false)
.OP_END_FACTORY_REG(CTCLoss)

/**
*@brief Performs greedy decoding on the logits given in inputs. \n

*@par Inputs:
*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n

*@par Attributes:
*@li merge_repeated: If True, merge repeated classes in output. \n

*@par Outputs:
*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
of a `SparseTensor<int64, 2>`. The rows store: [batch, time].
*@li decoded_values: Values vector, size: `(total_decoded_outputs)`,
of a `SparseTensor<int64, 2>`. The vector stores the decoded classes.
*@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.
Values are: `[batch_size, max_decoded_length]`.
*@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence
log-probabilities. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow CTCGreedyDecoder operator.
*/
REG_OP(CTCGreedyDecoder)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(sequence_length, TensorType({DT_INT32}))
.ATTR(merge_repeated, Bool, false)
.OUTPUT(decoded_indices, TensorType({DT_INT64}))
.OUTPUT(decoded_values, TensorType({DT_INT64}))
.OUTPUT(decoded_shape, TensorType({DT_INT64}))
.OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(CTCGreedyDecoder)

/**
*@brief Performs beam search decoding on the logits given in input. \n

*@par Inputs:
*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n

*@par Attributes:
*@li merge_repeated: If True, merge repeated classes in output. \n

*@par Outputs:
*@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j,
size `(total_decoded_outputs[j] x 2)`, has indices of a
`SparseTensor<int64, 2>`. The rows store: [batch, time].
*@li decoded_values: A list (length: top_paths) of values vectors. Vector j,
size `(length total_decoded_outputs[j])`, has the values of a
`SparseTensor<int64, 2>`. The vector stores the decoded classes for beam j.
*@li decoded_shape: A list (length: top_paths) of shape vector. Vector j,
size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
Its values are: `[batch_size, max_decoded_length[j]]`.
*@li log_probability: A matrix, shaped: `(batch_size x top_paths)`. The
sequence log-probabilities. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow CTCBeamSearchDecoder operator.
*/
REG_OP(CTCBeamSearchDecoder)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(sequence_length, TensorType({DT_INT32}))
.REQUIRED_ATTR(beam_width, Int)
.REQUIRED_ATTR(top_paths, Int)
.ATTR(merge_repeated, Bool, true)
.DYNAMIC_OUTPUT(decoded_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(decoded_values, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(decoded_shape, TensorType({DT_INT64}))
.OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(CTCBeamSearchDecoder)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_

+ 2344
- 0
third_party/fwkacllib/inc/inc/ops/data_flow_ops.h
File diff suppressed because it is too large
View File


+ 3735
- 0
third_party/fwkacllib/inc/inc/ops/elewise_calculation_ops.h
File diff suppressed because it is too large
View File


+ 333
- 0
third_party/fwkacllib/inc/inc/ops/functional_ops.h View File

@@ -0,0 +1,333 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file functional_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {
/**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
* If "cond" means True, the selected subgraph is "then_branch".
* Otherwise, the selected subgraph is "else_branch" . \n

*@par Inputs:
*@li cond: A Tensor. If "cond" is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if "cond" is a numerical scalar, non-zero means True and zero means False;
* if "cond" is a string scalar, non-empty means True and empty means False;
* if "cond" is not a scalar, non-empty means True and empty means False.
*@li input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li then_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what else_branch returns.
*@li else_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what then_branch returns . \n

*@par Outputs:
*output: The output tensors returned by either then_branch(input) or else_branch(input) . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator _If.
*/
REG_OP(_If)
.INPUT(cond, TensorType::ALL())
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(then_branch)
.GRAPH(else_branch)
.OP_END_FACTORY_REG(_If)

/**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
* If "cond" means True, the selected subgraph is "then_branch".
* Otherwise, the selected subgraph is "else_branch" . \n

*@par Inputs:
*@li cond: A Tensor. If "cond" is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if "cond" is a numerical scalar, non-zero means True and zero means False;
* if "cond" is a string scalar, non-empty means True and empty means False;
* if "cond" is not a scalar, non-empty means True and empty means False.
*@li input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li then_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what else_branch returns.
*@li else_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what then_branch returns . \n

*@par Outputs:
*output: The output tensors returned by either then_branch(input) or else_branch(input) . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator StatelessIf.
*/
REG_OP(StatelessIf)
.INPUT(cond, TensorType::ALL())
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(then_branch)
.GRAPH(else_branch)
.OP_END_FACTORY_REG(StatelessIf)

/**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors.
* If "cond" means True, the selected subgraph is "then_branch".
* Otherwise, the selected subgraph is "else_branch" . \n

*@par Inputs:
*@li cond: A Tensor. If "cond" is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if "cond" is a numerical scalar, non-zero means True and zero means False;
* if "cond" is a string scalar, non-empty means True and empty means False;
* if "cond" is not a scalar, non-empty means True and empty means False.
*@li input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li then_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what else_branch returns.
*@li else_branch: A subgraph takes 'input' and returns a list of tensors,
* whose types are the same as what then_branch returns . \n

*@par Outputs:
*output: The output tensors returned by either then_branch(input) or else_branch(input) . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator If.
*/
REG_OP(If)
.INPUT(cond, TensorType::ALL())
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(then_branch)
.GRAPH(else_branch)
.OP_END_FACTORY_REG(If)

/**
*@brief Select one of the subgraphs to pass the input tensors and return the output tensors . \n

*@par Inputs:
*@li branch_index: A int32 scalar which determines the selected subgraph.
*@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n

*@par Graphs:
*branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors,
* whose types are the same as what every other subgraph returns . \n

*@par Outputs:
*output: The output tensors returned by one of branches . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator Case.
*/
REG_OP(Case)
.INPUT(branch_index, DT_INT32)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.DYNAMIC_GRAPH(branches)
.OP_END_FACTORY_REG(Case)

/**
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

*@par Inputs:
*input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li cond: A subgraph takes 'input' and returns a tensor.
* If the tensor is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if it is a numerical scalar, non-zero means True and zero means False;
* if it is a string scalar, non-empty means True and empty means False;
* if it is not a scalar, non-empty means True and empty means False.
*@li body: A subgraph takes 'input' and returns a another list of tensors . \n

*@par Attributes:
*parallel_iterations: An optional int, default as 10 . \n

*@par Outputs:
*output: The output tensors returned by "body". Has the same type as "input" . \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator _While.
*/
REG_OP(_While)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(cond)
.GRAPH(body)
.OP_END_FACTORY_REG(_While)

/**
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

*@par Inputs:
*input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li cond: A subgraph takes 'input' and returns a tensor.
* If the tensor is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if it is a numerical scalar, non-zero means True and zero means False;
* if it is a string scalar, non-empty means True and empty means False;
* if it is not a scalar, non-empty means True and empty means False.
*@li body: A subgraph takes 'input' and returns a another list of tensors . \n

*@par Attributes:
*parallel_iterations: An optional int, default as 10 . \n

*@par Outputs:
*output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator While.
*/
REG_OP(While)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(cond)
.GRAPH(body)
.ATTR(parallel_iterations, Int, 10)
.OP_END_FACTORY_REG(While)

/**
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n

*@par Inputs:
*input: The input tensors . It's a dynamic input. \n

*@par Graphs:
*@li cond: A subgraph takes 'input' and returns a tensor.
* If the tensor is not a scalar of boolean type,
* it will be converted to a boolean according to the following rule:
* if it is a numerical scalar, non-zero means True and zero means False;
* if it is a string scalar, non-empty means True and empty means False;
* if it is not a scalar, non-empty means True and empty means False.
*@li body: A subgraph takes 'input' and returns a another list of tensors . \n

*@par Attributes:
*parallel_iterations: An optional int, default as 10 . \n

*@par Outputs:
*output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator StatelessWhile.
*/
REG_OP(StatelessWhile)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(cond)
.GRAPH(body)
.ATTR(parallel_iterations, Int, 10)
.OP_END_FACTORY_REG(StatelessWhile)

/**
*@brief Cyclic execute the "body" subgraph until the first input of For op exceed upper bound . \n

*@par Inputs:
*@li start: A int32 scalar. The lower bound.
*@li limit: A int32 scalar. The upper bound.
*@li delta: A int32 scalar. The step size.
*@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n

*@par Graphs:
*body: A subgraph takes 'input' and returns a another list of tensors . \n

*@par Outputs:
*output: The output tensors returned by "body". Has the same type as "input" . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator For.
*/
REG_OP(For)
.INPUT(start, DT_INT32)
.INPUT(limit, DT_INT32)
.INPUT(delta, DT_INT32)
.DYNAMIC_INPUT(input, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(body)
.OP_END_FACTORY_REG(For)

/**
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n

*@par Inputs:
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n

*@par Graphs:
*f: A subgraph takes 'args' and returns a another list of tensors . \n

*@par Attributes:
*@li config: An optional string, default as "".
*@li config_proto: An optional int, default as "".
*@li executor_type: An optional int, default as "" . \n

*@par Outputs:
*output: The output tensors returned by "f" . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator PartitionedCall.
*/
REG_OP(PartitionedCall)
.DYNAMIC_INPUT(args, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(f)
.ATTR(config, String, "")
.ATTR(config_proto, String, "")
.ATTR(executor_type, String, "")
.OP_END_FACTORY_REG(PartitionedCall)

/**
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n

*@par Inputs:
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n

*@par Graphs:
*f: A subgraph takes 'args' and returns a another list of tensors . \n

*@par Attributes:
*@li config: An optional string, default as "".
*@li config_proto: An optional int, default as "".
*@li executor_type: An optional int, default as "" . \n

*@par Outputs:
*output: The output tensors returned by "f" . It's a dynamic output. \n

*@par Third-party framework compatibility
*@Compatible with the TensorFlow operator StatefulPartitionedCall.
*/
REG_OP(StatefulPartitionedCall)
.DYNAMIC_INPUT(args, TensorType::ALL())
.DYNAMIC_OUTPUT(output, TensorType::ALL())
.GRAPH(f)
.ATTR(config, String, "")
.ATTR(config_proto, String, "")
.ATTR(executor_type, String, "")
.OP_END_FACTORY_REG(StatefulPartitionedCall)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_

+ 103
- 0
third_party/fwkacllib/inc/inc/ops/get_data_ops.h View File

@@ -0,0 +1,103 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file get_data_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Binding dataset and GetNext
*@par Attributes: None
*@par Inputs: Dataset and GetNext operator
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(MakeIterator)
.INPUT(x, TensorType::ALL())
.INPUT(x1, TensorType::ALL())
.ATTR(_kernel, String, "dp")
.OP_END_FACTORY_REG(MakeIterator)

/**
*@brief Dataset iterator
*@par Attributes:
*output_types: Data type of output
*output_shapes: Shapes of output
*container: Iterator container name
*shared_name: Iterator id
*@par Inputs: None
*@par Outputs: Dataset
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(IteratorV2)
.OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListInt, {})
.ATTR(output_shapes,ListListInt, {{}, {}})
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.OP_END_FACTORY_REG(IteratorV2)

/**
*@brief Dataset GetNext iterator
*@par Attributes:
*output_types: Data type of output
*output_shapes: Shapes of output
*output_num: Num of output
*@par Inputs: Queue data
*@par Outputs: Input of computer graph
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(IteratorGetNext)
.INPUT(x, TensorType::ALL())
.DYNAMIC_OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListInt, {})
.ATTR(output_shapes, ListListInt, {{},{}})
.ATTR(output_num, Int, 1)
.ATTR(_kernel, String, "dp")
.OP_END_FACTORY_REG(IteratorGetNext)

/**
*@brief Device queue data area.
*@par Attributes:
*output_types: Data type of output
*output_shapes: Shapes of output
*channel_name: Channel ID corresponding to TDT
*@par Inputs: None
*@par Outputs: Dataset GetNext iterator
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DeviceQueueDataset)
.OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListInt, {})
.ATTR(output_shapes, ListListInt, {{},{}})
.ATTR(channel_name, String, "")
.ATTR(_iterator_name, String, "IteratorV2")
.OP_END_FACTORY_REG(DeviceQueueDataset)

} // namespace ge


#endif // OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_

+ 284
- 0
third_party/fwkacllib/inc/inc/ops/hcom_ops.h View File

@@ -0,0 +1,284 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file hcom_ops.h
* \brief huawei collective communication library ops.
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_

#include "graph/operator_reg.h"

namespace ge {
/**
* @brief Outputs a tensor gathering all input tensors.
* @par Inputs:
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li rank_size: A required integer identifying the number of ranks
participating in the op.
* @li group: A required string identifying the group name of ranks
participating in the op.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
* @attention Constraints:
"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomAllGather)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(rank_size, Int)
.REQUIRED_ATTR(group, String)
.OP_END_FACTORY_REG(HcomAllGather)

/**
* @brief Outputs a tensor containing the reduction across all input tensors
passed to op.
* @par Inputs:
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li reduction: A required string identifying the reduction operation to
perform.The supported operation are: "sum", "max", "min", "prod".
* @li group: A required string identifying the group name of ranks
participating in the op.
* @li fusion: An optional integer identifying the fusion flag of the op.
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
* @li fusion_id: An optional integer identifying the fusion id of the op.
* The HcomAllReduce ops with the same fusion id will be fused.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
* @attention Constraints:
*"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomAllReduce)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.REQUIRED_ATTR(reduction, String)
.REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 1)
.ATTR(fusion_id, Int, -1)
.OP_END_FACTORY_REG(HcomAllReduce)

/**
* @brief Broadcasts the input tensor in root rank to all ranks.
* @par Inputs:
* x: A list of dynamic input tensor. Must be one of the following types:
int8, int16, int32, float16, float32. It's a dynamic input.
* @par Attributes:
* @li root_rank: A required integer identifying the root rank in the op
input of this rank will be broadcast to other ranks.
* @li fusion: A required integer identifying if the op need to fusion,the
default value is none fusion
* @li fusion_id: A required integer identifying the fusion id if para fusion
is set.
* @li group: A required string identifying the group name of ranks
participating in the op.
* @par Outputs:
* y: A list of dynamic output tensor. Has the same type and length as "x".
* It's a dynamic output.
* @attention Constraints:
"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomBroadcast)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(root_rank, Int)
.REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 0)
.ATTR(fusion_id, Int, -1)
.OP_END_FACTORY_REG(HcomBroadcast)

/**
* @brief preforms reduction from others rank to rootrank
* @par Inputs:
* @li root_rank: A required integer identifying the root rank in the op
the reduction result will be on this root rank
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li reduction: A required string identifying the reduction operation to
perform.The supported operation are: "sum", "max", "min", "prod".
* @li group: A required string identifying the group name of ranks
participating in the op.
* @li fusion: An optional integer identifying the fusion flag of the op.
0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
* @li fusion_id: An optional integer identifying the fusion id of the op.
* The HcomReduce ops with the same fusion id will be fused.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
* @attention Constraints:
*"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomReduce)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.REQUIRED_ATTR(root_rank, Int)
.REQUIRED_ATTR(reduction, String)
.REQUIRED_ATTR(group, String)
.ATTR(fusion, Int, 0)
.ATTR(fusion_id, Int, -1)
.OP_END_FACTORY_REG(HcomReduce)
/**
* @brief Performs reduction across all input tensors, scattering in equal
blocks among ranks, each rank getting a chunk of data based on its rank
index.
* @par Inputs:
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li reduction: A required string identifying the reduction operation to
perform. The supported operation are: "sum", "max", "min", "prod".
* @li group: A required string identifying the group name of ranks
participating in the op.
* @li rank_size: A required integer identifying the number of ranks
participating in the op.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
* @attention Constraints:
"group" is limited to 128 characters. Use "hccl_world_group"
as the name of a world group.
*/
REG_OP(HcomReduceScatter)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
.REQUIRED_ATTR(reduction, String)
.REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(rank_size, Int)
.OP_END_FACTORY_REG(HcomReduceScatter)

/**
* @brief Sends the input tensor to destination rank.
* @par Inputs:
* x: A tensor. Must be one of the following types: int8, int16, int32, float16,
float32.
* @par Attributes:
* @li sr_tag: A required integer identifying the send/recv message tag. The
message will be received by the HcomReceive op with the same "sr_tag".
* @li dest_rank: A required integer identifying the destination rank.
* @li group: A string identifying the group name of ranks participating in
the op.
* @par Outputs:
* None.
* @attention Constraints:
@li "group" is limited to 128 characters. Use
"hccl_world_group" as the name of a world group.
* @li Operators HcomSend and HcomReceive have the same "sr_tag".
* @see HcomReceive
*/
REG_OP(HcomSend)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(sr_tag, Int)
.REQUIRED_ATTR(dest_rank, Int)
.OP_END_FACTORY_REG(HcomSend)

/**
* @brief Receives the tensor from source rank.
* @par Inputs:
* None.
* @par Attributes:
* @li sr_tag: A required integer identifying the send/recv message tag. The
message will be send by the HcomSend op with the same "sr_tag".
* @li src_rank: A required integer identifying the source rank.
* @li group: A required string identifying the group name of ranks
* participating in the op.
* @li shape: A required list identifying the shape of the tensor to be
received.
* @li dtype: A required integer identifying the type of the tensor to be
received. The supported types are: int8, int16, int32, float16, float32.
* @par Outputs:
* y: A tensor with type identified in "dtype".
* @attention Constraints:
@li "group" is limited to 128 characters. Use
"hccl_world_group" as the name of a world group.
* @li Operators HcomSend and HcomReceive have the same "sr_tag".
* @li "shape" should be same as the input tensor of HcomSend.
* @li "dtype" should be same as the input tensor of HcomSend.
* @see HcomSend
*/
REG_OP(HcomReceive)
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
.REQUIRED_ATTR(group, String)
.REQUIRED_ATTR(sr_tag, Int)
.REQUIRED_ATTR(src_rank, Int)
.REQUIRED_ATTR(shape, ListInt)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(HcomReceive)

/**
* @brief Performs Remote Read of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
* @par Outputs:
* local: A Tensor. whose value is length / size_of(Type)
*/
REG_OP(HcomRemoteRead)
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
.OUTPUT(local, TensorType::ALL())
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(HcomRemoteRead)

/**
* @brief Performs Remote Ref Read of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
* cache_var: The local base address
* local_offset: Skip step length
* @par Outputs:
* cache_var: The local base address
*/
REG_OP(HcomRemoteRefRead)
.INPUT(remote, TensorType({DT_UINT64}))
.INPUT(cache_var, TensorType({DT_UINT64}))
.INPUT(local_offset, TensorType({DT_UINT64}))
.OUTPUT(cache_var, TensorType({DT_UINT64}))
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(HcomRemoteRefRead)

/**
* @brief Performs Remote Write of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
* @par Inputs:
* local: A Tensor. whose value is length / size_of(Type)
*/
REG_OP(HcomRemoteWrite)
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
.INPUT(local, TensorType::ALL())
.OP_END_FACTORY_REG(HcomRemoteWrite)

/**
* @brief Performs Remote Write of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
* @par Inputs:
* local: A Tensor. whose value is length / size_of(Type)
*/
REG_OP(HcomRemoteScatterWrite)
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
.INPUT(local, TensorType::ALL())
.OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64}))
.OP_END_FACTORY_REG(HcomRemoteScatterWrite)

} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_

+ 81
- 0
third_party/fwkacllib/inc/inc/ops/hvd_ops.h View File

@@ -0,0 +1,81 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file hvd_ops.h
* \brief Horovod collective communication library ops.
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_

#include "graph/operator_reg.h"

namespace ge {
/**
* @brief Outputs a tensor gathering all input tensors.
* @par Inputs:
* x: A tensor. Must be one of the following types: uint8, int8, uint16, int16, int32,
int64, float16, bool.
* @par Attributes:
* @li rank_size: A required integer identifying the number of ranks
participating in the op.
* @par Outputs:
* y: A Tensor. Has the same type as "x".
*/
REG_OP(HorovodAllgather)
// GE not support float64 currently
.INPUT(x, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
.OUTPUT(y, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
// add rank_size attr
.REQUIRED_ATTR(rank_size, Int)
.OP_END_FACTORY_REG(HorovodAllgather)

/**
* @brief Outputs a tensor containing the reduction across all input tensors
passed to op.
* @par Inputs:
* x: A tensor. Must be one of the following types: int32, int64, float16, float32
@par Attributes:
* @li reduce_op: A required int identifying the reduction operation to
perform.The supported operation are: "sum", "max", "min", "prod".
* @par Outputs:
* y: A Tensor. Has the same type as "x".
*/
REG_OP(HorovodAllreduce)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(reduce_op, Int)
.OP_END_FACTORY_REG(HorovodAllreduce)

/**
* @brief Broadcasts the input tensor in root rank to all ranks.
* @par Inputs:
* x: A list of dynamic input tensor. Must be one of the following types:
int8, int32, float16, float32.
* @par Attributes:
* @li root_rank: A required integer identifying the root rank in the op
input of this rank will be broadcast to other ranks.
* @par Outputs:
* y: A list of dynamic output tensor. Has the same type and length as "x".
*/
REG_OP(HorovodBroadcast)
.INPUT(x, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
.OUTPUT(y, TensorType({DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_BOOL}))
.REQUIRED_ATTR(root_rank, Int)
.OP_END_FACTORY_REG(HorovodBroadcast)

} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_

+ 1539
- 0
third_party/fwkacllib/inc/inc/ops/image_ops.h
File diff suppressed because it is too large
View File


+ 84
- 0
third_party/fwkacllib/inc/inc/ops/internal_ops.h View File

@@ -0,0 +1,84 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file internal_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief aicpu assit help op for auxiliary matrix generation. \n

*@par Inputs:
*The input is dynamic for attribute func_name \n

*@par Attributes:
*@li func_name:An required param, for example "topkv2". \n

*@par Outputs:
*The output is dynamic for attribute func_name.
*/
REG_OP(AssistHelp)
.DYNAMIC_INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE }))
.DYNAMIC_OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
. REQUIRED_ATTR (func_name, String)
. OP_END_FACTORY_REG(AssistHelp)

/**
*@brief aicpu cache help for lhisi cache flush. \n

*@par Inputs:
*The input is dynamic for attribute func_name \n

*@par Outputs:
*The output is dynamic for attribute func_name.
*/
REG_OP(CacheUpdate)
.INPUT(x, TensorType::BasicType())
.OUTPUT(x, TensorType::BasicType())
.OP_END_FACTORY_REG(CacheUpdate)

/**
*@brief transfer data from L1 buffer to DDR or DDR to L1. \n

*@par Inputs:
*The input is dynamic for attribute func_name \n

*@par Outputs:
*The output is dynamic for attribute func_name.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(InternalDataMove)
.INPUT(x, TensorType::ALL())
.OUTPUT(y, TensorType::ALL())
.REQUIRED_ATTR(src_buf, String)
.REQUIRED_ATTR(dst_buf, String)
.OP_END_FACTORY_REG(InternalDataMove)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_

+ 443
- 0
third_party/fwkacllib/inc/inc/ops/linalg_ops.h View File

@@ -0,0 +1,443 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file linalg_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Computes the reverse mode backpropagated gradient of the Cholesky
algorithm . \n

*@par Inputs:
*The input x has to be symmetric and positive definite. Inputs include:
*@li x:A Tensor. Must be one of the following types: double, float32. Output
of batch Cholesky algorithm x = cholesky(A). Shape is [..., M, M]. Algorithm
depends only on lower triangular part of the innermost matrices of this tensor.
*@li grad:A Tensor. Must have the same type as l. df/dx where f is some
scalar function. Shape is [..., M, M]. Algorithm depends only on lower
triangular part of the innermost matrices of this tensor . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices.

*@par Third-party framework compatibility
*Compatible with tensorflow CholeskyGrad operator.
*/

REG_OP(CholeskyGrad)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(CholeskyGrad)

/**
*@brief Computes the Cholesky decomposition of one or more square matrices . \n

*@par Inputs:
*The input x has to be symmetric and positive definite.Inputs include:
*x:A Tensor. Must be one of the following types: double, float32, float16,
complex64, complex128. Shape is [..., M, M] . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices.

*@par Third-party framework compatibility
*Compatible with tensorflow Cholesky operator.
*/

REG_OP(Cholesky)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(Cholesky)

/**
*@brief Computes the sign and the log of the absolute value of the determinant
of one or more square matrices . \n

*@par Inputs:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float32,
complex64, complex128. Shape is [..., M, M] . \n

*@par Outputs:
*@li y:A Tensor. Has the same type as x.
*@li sign:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow LogMatrixDeterminant operator.
*/

REG_OP(LogMatrixDeterminant)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(LogMatrixDeterminant)

/**
*@brief Computes the determinant of one or more square matrices . \n

*@par Inputs:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float32, complex64,
complex128. Shape is [..., M, M] . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices.

*@par Third-party framework compatibility
*Compatible with tensorflow MatrixDeterminant operator.
*/

REG_OP(MatrixDeterminant)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(MatrixDeterminant)

/**
*@brief Computes the inverse of one or more square invertible matrices or
their adjoints (conjugate transposes) . \n

*@par Inputs:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor of input. Shape is [..., M, M] . \n

*@par Attributes:
*adjoint:An optional bool. Defaults to False.Boolean indicating whether to
deal with matrix or its (block-wise) adjoint . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow MatrixInverse operator.
*/

REG_OP(MatrixInverse)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixInverse)

/**
*@brief Solves systems of linear equations . \n

*@par Inputs:
*The input rhs must have the same type as matrix. Inputs include:
*@li matrix:A Tensor of input. Shape is [..., M, M].
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

*@par Attributes:
*adjoint:An optional bool. Defaults to False.Boolean indicating whether to
solve with matrix or its (block-wise) adjoint . \n

*@par Outputs:
*y:A Tensor. Has the same type as matrix . \n

*@attention Constraints:
*The input matrix is a tensor of shape [..., M, M] whose inner-most 2
dimensions form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow MatrixSolve operator.
*/

REG_OP(MatrixSolve)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixSolve)

/**
*@brief Solves systems of linear equations . \n

*@par Inputs:
*The input rhs must have the same type as matrix. Inputs include:
*@li matrix:A Tensor. Shape is [..., M, M].
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K].
*@li l2:0-D double Tensor. Ignored if fast=False . \n

*@par Attributes:
*fast:bool. Defaults to True . \n

*@par Outputs:
*y:Tensor of shape [..., N, K] whose inner-most 2 dimensions form M-by-K
matrices that solve the equations matrix[..., :, :] * output[..., :, :] =
rhs[..., :, :] in the least squares sense . \n

*@attention Constraints:
*The input matrix matrix is a tensor of shape [..., M, M] whose inner-most 2
dimensions form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow MatrixSolveLs operator.
*/

REG_OP(MatrixSolveLs)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(l2, TensorType({DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(fast, Bool, true)
.OP_END_FACTORY_REG(MatrixSolveLs)

/**
*@brief Solves systems of linear equations with upper or lower triangular
matrices by backsubstitution . \n

*@par Inputs:
*The input rhs must have the same type as matrix. Inputs include:
*@li matrix: A Tensor. Shape is [..., M, M].
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

*@par Attributes:
*@li lower: An optional bool. Defaults to True. Boolean indicating whether
the innermost matrices in matrix are lower or upper triangular.
*@li An optional bool. Defaults to False. Boolean indicating whether to solve
with matrix or its (block-wise) adjoint . \n

*@par Outputs:
*y:A Tensor. Has the same type as matrix . \n

*@attention Constraints:
*The input matrix is a tensor of shape [..., M, M] whose inner-most 2
dimensions form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow MatrixTriangularSolve operator.
*/

REG_OP(MatrixTriangularSolve)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(lower, Bool, true)
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixTriangularSolve)

/**
*@brief Computes the QR decompositions of one or more matrices . \n

*@par Inputs:
*The input shape of x must be [..., M, N]. Inputs include:
*x:A Tensor whose shape is [..., M, N]. \n

*@par Attributes:
*full_matrices: An optional bool. Defaults to False. If true, compute
full-sized q and r. If false (the default), compute only the leading P
columns of q . \n

*@par Outputs:
*@li q: A Tensor. Has the same type as x.
*@li r: A Tensor. Has the same type as x . \n

*@attention Constraints:
*The input matrix x is a tensor of shape [..., M, N] whose inner-most 2
dimensions form matrices of size [M, N]. \n

*@par Third-party framework compatibility
*Compatible with tensorflow Qr operator.
*/

REG_OP(Qr)
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.ATTR(full_matrices, Bool, false)
.OP_END_FACTORY_REG(Qr)

/**
*@brief Computes the eigen decomposition of a batch of self-adjoint matrices . \n

*@par Inputs:
*The input shape of x must be [..., N, N]. Inputs include:
*x:Tensor of shape [..., N, N]. Only the lower triangular part of each inner
inner matrix is referenced . \n

*@par Attributes:
*compute_v:bool. Defaults to True . \n

*@par Outputs:
*@li eigen_value:Eigenvalues. Shape is [..., N]. Sorted in non-decreasing order.
*@li eigen_vector:Shape is [..., N, N]. The columns of the inner most matrices
contain eigenvectors of the corresponding matrices in tensor

*@attention Constraints:
*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions
form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow SelfAdjointEig operator.
*/

REG_OP(SelfAdjointEig)
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT }))
.ATTR(compute_v, Bool, true)
.OP_END_FACTORY_REG(SelfAdjointEig)

/**
*@brief Computes the singular value decompositions of one or more matrices . \n

*@par Inputs:
*The input shape of x must be [..., N, N]. Inputs include:
*x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n

*@par Attributes:
*compute_uv:If True then left and right singular vectors will be computed and
returned in u and v, respectively. Otherwise, only the singular values will
be computed, which can be significantly faster . \n

*@par Outputs:
*@li sigma:Singular values. Shape is [..., P]. The values are sorted in
reverse order of magnitude, so s[..., 0] is the largest value, s[..., 1]
is the second largest, etc.
*@li u:Left singular vectors. If full_matrices is False (default) then shape
is [..., M, P]; if full_matrices is True then shape is [..., M, M]. Not
returned if compute_uv is False.
*@li v:Right singular vectors. If full_matrices is False (default) then shape
is [..., N, P]. If full_matrices is True then shape is [..., N, N]. Not
returned if compute_uv is False . \n

*@attention Constraints:
*The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions
form square matrices. \n

*@par Third-party framework compatibility
*Compatible with tensorflow Svd operator
*/

REG_OP(Svd)
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT }))
.ATTR(compute_uv, Bool, true)
.ATTR(full_matrices, Bool, false)
.OP_END_FACTORY_REG(Svd)

/**
*@brief Computes the LU decomposition of one or more square matrices . \n

*@par Inputs:
*input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
matrices of size `[M, M]` . \n

*@par Outputs:
*@li lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part
denotes the lower triangular factor `L` with unit diagonal.
*@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow Lu operator.
*/

REG_OP(Lu)
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(output_idx_type, Type)
.OP_END_FACTORY_REG(Lu)

/**
*@brief Computes the matrix square root of one or more square matrices . \n

*@par Inputs:
*input: Shape is `[..., M, M]` . \n

*@par Outputs:
y: Shape is `[..., M, M]` . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow MatrixSquareRoot operator.
*/

REG_OP(MatrixSquareRoot)
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(MatrixSquareRoot)

/**
*@brief Solves tridiagonal systems of equations . \n

*@par Inputs:
*@li diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the tridiagonal matrices with three rows being the superdiagonal, diagonals, and subdiagonals, in order. The last element of the superdiagonal and the first element of the subdiagonal is ignored.
*@li rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each
left-hand side . \n

*@par Outputs:
y: Tensor of shape `[..., M, K]` containing the solutions \n

*@par Third-party framework compatibility
* Compatible with TensorFlow TridiagonalSolve operator.
*/

REG_OP(TridiagonalSolve)
.INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(partial_pivoting, Bool, true)
.OP_END_FACTORY_REG(TridiagonalSolve)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_

+ 230
- 0
third_party/fwkacllib/inc/inc/ops/list_ops.h View File

@@ -0,0 +1,230 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file list_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_

#include <algorithm>
#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Creates and returns an empty tensor list. \n

*@par Inputs:
*@li element_shape: A shape compatible with that of elements in the list.
*@li max_num_elements: The maximum number of elements. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li handle: An empty tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow EmptyTensorList operator.
*/
REG_OP(EmptyTensorList)
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(max_num_elements, TensorType({DT_INT32}))
.OUTPUT(handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(EmptyTensorList)

/**
*@brief Returns a list which has the passed-in `Tensor` as last element
and the other elements of the given list in `input_handle`. \n

*@par Inputs:
*@li input_handle: The old list.
*@li tensor: The tensor to put on the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle:A list with the elements of old list followed by tensor. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPushBack operator.
*/
REG_OP(TensorListPushBack)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListPushBack)

/**
*@brief The last element of the input list as well as a
list with all but that element. \n

*@par Inputs:
*@li input_handle: The input list.
*@li element_shape: A shape compatible with that of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle:A list with the elements of the old list followed by tensor.
*@li tensor:The withdrawn last element of the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListPopBack operator.
*/
REG_OP(TensorListPopBack)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListPopBack)

/**
*@brief The number of tensors in the input tensor list. \n

*@par Inputs:
*@li input_handle: The input list. \n

*@par Outputs:
*@li length:The number of tensors in the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListLength operator.
*/
REG_OP(TensorListLength)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(length, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(TensorListLength)

/**
*@brief The shape of elements in the input tensor list. \n

*@par Inputs:
*@li input_handle: The input list. \n

*@par Attributes:
*@li shape_type: The type of shape in the list. \n

*@par Outputs:
*@li element_shape:A shape compatible with that of elements in the list. \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListElementShape operator.
*/
REG_OP(TensorListElementShape)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.ATTR(shape_type, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListElementShape)

/**
*@brief List of the given size with empty elements. \n

*@par Inputs:
*@li element_shape: A shape compatible with that of elements in the list.
*@li num_elements: The number of elements to reserve. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list.
*@li shape_type: The type of shape in the list. \n

*@par Outputs:
*@li handle: An output tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListReserve operator.
*/
REG_OP(TensorListReserve)
.INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
.INPUT(num_elements, TensorType({DT_INT32}))
.OUTPUT(handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.ATTR(shape_type, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListReserve)

/**
*@brief Get input tensor list elements of index position. \n

*@par Inputs:
*@li input_handle: The input list.
*@li index: A tensor of position.
*@li element_shape: A shape compatible with that of elements in the list. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li item: An output tensor value of index position . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListGetItem operator.
*/
REG_OP(TensorListGetItem)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(index, TensorType({DT_INT32}))
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListGetItem)

/**
*@brief Sets the index-th position of the list to contain the given tensor. \n

*@par Inputs:
*@li input_handle: The input list.
*@li index: The position in the list to which the tensor will be assigned.
*@li item: The element to be assigned to that position. \n

*@par Attributes:
*@li element_dtype: The type of elements in the list. \n

*@par Outputs:
*@li output_handle: An output tensor list . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow TensorListSetItem operator.
*/
REG_OP(TensorListSetItem)
.INPUT(input_handle, TensorType({DT_VARIANT}))
.INPUT(index, TensorType({DT_INT32}))
.INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(output_handle, TensorType({DT_VARIANT}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListSetItem)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_

+ 116
- 0
third_party/fwkacllib/inc/inc/ops/logging_ops.h View File

@@ -0,0 +1,116 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file logging_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_

#include "graph/operator.h"
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Provides the time since epoch in seconds . \n

*@par Outputs:
*y: A Tensor of type float64. The timestamp as a double for seconds since
the Unix epoch . \n

*@attention Constraints:
*The timestamp is computed when the op is executed, not when it is added to
the graph . \n

*@par Third-party framework compatibility
*Compatible with tensorflow Timestamp operator . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(Timestamp)
.OUTPUT(y, TensorType({DT_DOUBLE}))
.OP_END_FACTORY_REG(Timestamp)

/**
*@brief Asserts that the given condition is true . \n

*@par Inputs:
*If input_condition evaluates to false, print the list of tensors in data.
*Inputs include:
*@li input_condition: The condition to evaluate.
*@li input_data: The tensors to print out when condition is false .
It's a dynamic input. \n

*@par Attributes:
*summarize: Print this many entries of each tensor . \n

*@par Third-party framework compatibility
*Compatible with tensorflow Assert operator . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(Assert)
.INPUT(input_condition, TensorType{DT_BOOL})
.DYNAMIC_INPUT(input_data, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8,
DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING}))
.ATTR(summarize, Int, 3)
.OP_END_FACTORY_REG(Assert)

/**
*@brief Prints a tensor . \n

*@par Inputs:
*x: The tensor to print, it is a dynamic_input . \n

*Compatible with aicpu Print operator . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(Print)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32,
DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING}))
.OP_END_FACTORY_REG(Print)

/**
*@brief Prints a string scalar . \n

*@par Inputs:
*The dtype of input x must be string. Inputs include:
*x: The string scalar to print . \n

*@par Attributes:
*output_stream: A string specifying the output stream or logging level
to print to . \n

*@par Third-party framework compatibility
*Compatible with tensorflow PrintV2 operator . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(PrintV2)
.INPUT(x, TensorType({DT_STRING}))
.ATTR(output_stream, String, "stderr")
.OP_END_FACTORY_REG(PrintV2)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_

+ 308
- 0
third_party/fwkacllib/inc/inc/ops/lookup_ops.h View File

@@ -0,0 +1,308 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file lookup_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Replaces the contents of the table with the specified keys and values . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*@li handle: A Tensor of type resource. Handle to the table.
*@li keys: A Tensor. Any shape. Keys to look up.
*@li values: A Tensor. Values to associate with keys . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow LookupTableImport operator.
*/

REG_OP(LookupTableImport)
.INPUT(handle, TensorType({DT_RESOURCE}))
.INPUT(keys, TensorType({DT_STRING, DT_INT32, DT_INT64}))
.INPUT(values, TensorType({DT_BOOL, DT_DOUBLE, \
DT_FLOAT, DT_INT32, DT_INT64, DT_STRING}))
.OP_END_FACTORY_REG(LookupTableImport)

/**
*@brief Updates the table to associates keys with values . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*@li handle: A Tensor of type resource. Handle to the table.
*@li keys: A Tensor. Any shape. Keys to look up.
*@li values: A Tensor. Values to associate with keys . \n

*@attention Constraints:
*@li The tensor keys must be of the same type as the keys of the table.
*@li The tensor values must be of the type of the table values.

*@par Third-party framework compatibility.
*Compatible with tensorflow LookupTableInsert operator.
*/

REG_OP(LookupTableInsert)
.INPUT(handle, TensorType({DT_RESOURCE}))
.INPUT(keys, TensorType({DT_STRING, DT_INT32, DT_INT64}))
.INPUT(values, TensorType({DT_BOOL, DT_DOUBLE, DT_FLOAT, \
DT_INT32, DT_INT64, DT_STRING}))
.OP_END_FACTORY_REG(LookupTableInsert)

/**
*@brief Outputs all keys and values in the table . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*handle: A Tensor of type resource. Handle to the table . \n

*@par Attributes:
*@li Tkeys: A DType.
*@li Tvalues: A DType . \n

*@par Outputs:
*@li keys: A Tensor of type Tkeys.
*@li values: A Tensor of type Tvalues . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow LookupTableExport operator.
*/

REG_OP(LookupTableExport)
.INPUT(handle, TensorType({DT_RESOURCE}))
.OUTPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(values, TensorType({DT_BOOL, DT_DOUBLE, DT_FLOAT, \
DT_INT32, DT_INT64, DT_STRING}))
.REQUIRED_ATTR(Tkeys, Type)
.REQUIRED_ATTR(Tvalues, Type)
.OP_END_FACTORY_REG(LookupTableExport)

/**
*@brief Computes the number of elements in the given table . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*handle: A Tensor of type resource. Handle to the table . \n

*@par Outputs:
*size: A Tensor of type int64 . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow LookupTableSize operator.
*/

REG_OP(LookupTableSize)
.INPUT(handle, TensorType({DT_RESOURCE}))
.OUTPUT(size, TensorType({DT_INT64}))
.OP_END_FACTORY_REG(LookupTableSize)

/**
*@brief Looks up keys in a table, outputs the corresponding values . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*@li handle: A Tensor of type resource. Handle to the table.
*@li keys: A Tensor. Any shape. Keys to look up.
*@li default_value: A Tensor . \n

*@par Attributes:
*Tout: Specified type of ouput values . \n

*@par Outputs:
*values: A Tensor. Has the same type as default_value . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow LookupTableFind operator.
*/

REG_OP(LookupTableFind)
.INPUT(handle, TensorType({DT_RESOURCE}))
.INPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.INPUT(default_value, TensorType({DT_DOUBLE, DT_FLOAT, \
DT_INT32, DT_INT64, DT_STRING, DT_BOOL}))
.OUTPUT(values, TensorType({DT_DOUBLE, DT_FLOAT, DT_INT32, \
DT_INT64, DT_STRING, DT_BOOL}))
.REQUIRED_ATTR(Tout, Type)
.OP_END_FACTORY_REG(LookupTableFind)

/**
*@brief Creates a non-initialized hash table . \n

*@par Attributes:
*@li container: An optional string. Defaults to "". If non-empty, this table
is placed in the given container. Otherwise, a default container is used.
*@li shared_name: An optional string. Defaults to "". If non-empty, this
table is shared under the given name across multiple sessions.
*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
shared_name is empty, the table is shared using the node name.
*@li key_dtype: A DType. Type of the table keys.
*@li value_dtype: A DType. Type of the table values . \n

*@par Outputs:
*handle: A Tensor of type resource. Handle to the table . \n

*@attention Constraints:
*The implementation for HashTable on Ascend uses ai cpu, with bad performance.

*@par Third-party framework compatibility.
*Compatible with tensorflow HashTable operator.
*/

REG_OP(HashTable)
.OUTPUT(handle, TensorType({DT_RESOURCE}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.ATTR(use_node_name_sharing, Bool, false)
.REQUIRED_ATTR(key_dtype, Type)
.REQUIRED_ATTR(value_dtype, Type)
.OP_END_FACTORY_REG(HashTable)

/**
*@brief Table initializer that takes two tensors for keys and values
respectively . \n

*@par Inputs:
*The dtype of input handle must be resource. Inputs include:
*@li handle: A Tensor of type resource. Handle to a table which will be
initialized.
*@li keys: A Tensor. Keys of type Tkey.
*@li values: A Tensor. Values of type Tval . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow InitializeTable operator.
*/

REG_OP(InitializeTable)
.INPUT(handle, TensorType({DT_RESOURCE}))
.INPUT(keys, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.INPUT(values, TensorType({DT_INT32, DT_INT64, DT_FLOAT, \
DT_DOUBLE, DT_BOOL, DT_STRING}))
.OP_END_FACTORY_REG(InitializeTable)

/**
*@brief Creates an empty hash table that uses tensors as the backing store . \n

*@par Inputs:
*The input deleted_key must have the same type as empty_key. Inputs include:
*@li empty_key: A Tensor. The key used to represent empty key buckets
internally. Must not be used in insert or lookup operations.
*@li deleted_key: A Tensor. Must have the same type as empty_key . \n

*@par Attributes:
*@li container: An optional string. Defaults to "". If non-empty, this table
is placed in the given container. Otherwise, a default container is used.
*@li shared_name: An optional string. Defaults to "". If non-empty, this
table is shared under the given name across multiple sessions.
*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
shared_name is empty, the table is shared using the node name.
*@li value_dtype: A DType. Type of the table values.
*@li value_shape: An optional TensorShape or list of ints. Defaults to [].
The shape of each value.
*@li initial_num_buckets: An optional int. Defaults to 131072. The initial
number of hash table buckets. Must be a power to 2.
*@li max_load_factor: An optional float. Defaults to 0.8. The maximum ratio
between number of entries and number of buckets before growing the table.
Must be between 0 and 1 . \n

*@par Outputs:
*handle: A Tensor of type resource. Handle to the table . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow MutableDenseHashTable operator.
*/

REG_OP(MutableDenseHashTable)
.INPUT(empty_key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.INPUT(deleted_key, TensorType({DT_INT32, DT_INT64, DT_STRING}))
.OUTPUT(handle, TensorType({DT_RESOURCE}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.ATTR(use_node_name_sharing, Bool, false)
.REQUIRED_ATTR(value_dtype, Type)
.ATTR(value_shape, ListInt, {})
.ATTR(initial_num_buckets, Int, 131072)
.ATTR(max_load_factor, Float, 0.8)
.OP_END_FACTORY_REG(MutableDenseHashTable)

/**
*@brief Creates an empty hash table . \n

*@par Attributes:
*@li container: An optional string. Defaults to "". If non-empty, this table
is placed in the given container. Otherwise, a default container is used.
*@li shared_name: An optional string. Defaults to "". If non-empty, this
table is shared under the given name across multiple sessions.
*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
shared_name is empty, the table is shared using the node name.
*@li key_dtype: A DType. Type of the table keys.
*@li value_dtype: A DType. Type of the table values.
*@li value_shape: An optional TensorShape or list of ints. Defaults to [] . \n

*@par Outputs:
*handle: A Tensor of type resource. Handle to the table . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow MutableHashTableOfTensors operator.
*/

REG_OP(MutableHashTableOfTensors)
.OUTPUT(handle, TensorType({DT_RESOURCE}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.ATTR(use_node_name_sharing, Bool, false)
.REQUIRED_ATTR(key_dtype, Type)
.REQUIRED_ATTR(value_dtype, Type)
.ATTR(value_shape, ListInt, {})
.OP_END_FACTORY_REG(MutableHashTableOfTensors)

/**
*@brief Creates an empty hash table . \n

*@par Attributes:
*@li container: An optional string. Defaults to "". If non-empty, this table
is placed in the given container. Otherwise, a default container is used.
*@li shared_name: An optional string. Defaults to "". If non-empty, this
table is shared under the given name across multiple sessions.
*@li use_node_name_sharing: An optional bool. Defaults to False. If true and
shared_name is empty, the table is shared using the node name.
*@li key_dtype: A DType. Type of the table keys.
*@li value_dtype: A DType. Type of the table values . \n

*@par Outputs:
*handle: A Tensor of type resource. Handle to the table . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow MutableHashTable operator.
*/

REG_OP(MutableHashTable)
.OUTPUT(handle, TensorType({DT_RESOURCE}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.ATTR(use_node_name_sharing, Bool, false)
.REQUIRED_ATTR(key_dtype, Type)
.REQUIRED_ATTR(value_dtype, Type)
.OP_END_FACTORY_REG(MutableHashTable)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_

+ 957
- 0
third_party/fwkacllib/inc/inc/ops/math_ops.h View File

@@ -0,0 +1,957 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file math_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Computes the output as (shift + scale * x) ^ power . \n

*@par Inputs:
* x: A Tensor of type float16 or float32 . \n

*@par Attributes:
*@li power: Optional. Must be one of the following types: float32. Defaults to 1.0.
*@li scale: Optional. Must be one of the following types: float32. Defaults to 1.0.
*@li shift: Optional. Must be one of the following types: float32. Defaults to 0.0 . \n

*@par Outputs:
* y: A Tensor. Has the same type and shape as "x".
*@par Third-party framework compatibility
* Compatible with the Caffe operator Power.
*/

REG_OP(Power)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(power, Float, 1.0)
.ATTR(scale, Float, 1.0)
.ATTR(shift, Float, 0.0)
.OP_END_FACTORY_REG(Power);

/**
*@brief Compute the lower regularized incomplete Gamma function P(a, x) . \n

*@par Inputs:
*The input a and x must have the same type. Inputs include:
*@li a:A Tensor. Must be one of the following types: float, double.
*@li x:A Tensor. Must have the same type as a . \n

*@par Outputs:
*z:A Tensor. Has the same type as a . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow Igamma operator.
*/

REG_OP(Igamma)
.INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(Igamma)

/**
*@brief Compute the upper regularized incomplete Gamma function Q(a, x) . \n

*@par Inputs:
*The input a and x must have the same type. Inputs include:
*@li a:A Tensor. Must be one of the following types: float, float64.
*@li x:A Tensor. Must have the same type as a . \n

*@par Outputs:
*z:A Tensor. Has the same type as a . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow Igammac operator.
*/

REG_OP(Igammac)
.INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(Igammac)

/**
*@brief Compare values of input to threshold and pack resulting bits into
a uint8 . \n

*@par Inputs:
*The input size must be a non-negative int32 scalar Tensor. Inputs include:
*@li input:Values to compare against threshold and bitpack.
*@li threshold:Threshold to compare against . \n

*@par Outputs:
*y:The bitpacked comparisons . \n

*@attention Constraints:
*Currently, the innermost dimension of the tensor must be divisible by 8. \n

*@par Third-party framework compatibility
*Compatible with tensorflow CompareAndBitpack operator
*/

REG_OP(CompareAndBitpack)
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, \
DT_INT16, DT_INT32, DT_INT64, DT_BOOL }))
.INPUT(threshold, TensorType({ DT_FLOAT, DT_FLOAT16, DT_DOUBLE, \
DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL }))
.OUTPUT(y, TensorType(DT_UINT8))
.OP_END_FACTORY_REG(CompareAndBitpack)

/**
*@brief Counts the number of occurrences of each value in an integer array.
Outputs a vector with length size and the same dtype as weights. If weights
are empty, then index i stores the number of times the value i is counted in
arr. If weights are non-empty, then index i stores the sum of the value in
weights at each index . \n

*@par Inputs:
*The input size must be a non-negative int32 scalar Tensor. Inputs include:
*@li array:int32 Tensor.
*@li size:non-negative int32 scalar Tensor.
*@li weights: is an int32, int64, float32, or double Tensor with the same
shape as arr, or a length-0 Tensor, in which case it acts as all weights
equal to 1 . \n

*@par Outputs:
*bins:1D Tensor with length equal to size. The counts or summed weights for
each value in the range [0, size) . \n

*@par Third-party framework compatibility
*Compatible with tensorflow Bincount operator
*/

REG_OP(Bincount)
.INPUT(array, TensorType(DT_INT32))
.INPUT(size, TensorType(DT_INT32))
.INPUT(weights, TensorType({ DT_FLOAT, DT_INT32, DT_INT64, DT_DOUBLE }))
.OUTPUT(bins, TensorType({ DT_FLOAT, DT_INT32, DT_INT64, DT_DOUBLE }))
.OP_END_FACTORY_REG(Bincount)

/**
*@brief Compute the regularized incomplete beta integral . \n

*@par Inputs:
*The input b and x must have the same types as a. Inputs include:
*@li a:A Tensor. Must be one of the following types: float32, double.
*@li b:A Tensor. Must have the same type as a.
*@li x:A Tensor. Must have the same type as a . \n

*@par Outputs:
*z:A Tensor. Has the same type as a . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow Betainc operator.
*/

REG_OP(Betainc)
.INPUT(a, TensorType({DT_DOUBLE, DT_FLOAT}))
.INPUT(b, TensorType({DT_DOUBLE, DT_FLOAT}))
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT}))
.OUTPUT(z, TensorType({DT_DOUBLE, DT_FLOAT}))
.OP_END_FACTORY_REG(Betainc)

/**
*@brief Compute the Hurwitz zeta function

*@par Inputs:
*The input q must be the same type as x. Inputs include:
*@li x:A Tensor. Must be one of the following types: float32, double.
*@li q:A Tensor. Must have the same type as x . \n

*@par Outputs:
*z:A Tensor. Has the same type as x . \n

*@attention Constraints:
*The implementation for Zeta on Ascend uses ai cpu, with bad performance.

*@par Third-party framework compatibility.
*Compatible with tensorflow Zeta operator.
*/

REG_OP(Zeta)
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT}))
.INPUT(q, TensorType({DT_DOUBLE, DT_FLOAT}))
.OUTPUT(z, TensorType({DT_DOUBLE, DT_FLOAT}))
.OP_END_FACTORY_REG(Zeta)

/**
*@brief Bucketize 'input' based on 'boundaries'. For example, if the inputs
are boundaries = [0, 10, 100] input = [[-5, 10000] [150, 10] [5, 100]] then
the output will be output = [[0, 3] [3, 2] [1, 3]]

*@par Inputs:
*The dtype of input x int float double. Inputs include:
*x:Any shape of Tensor contains with int or float type . \n

*@par Attributes:
*boundaries:A sorted list of floats gives the boundary of the buckets . \n

*@par Outputs:
*y:Same shape with 'input', each value of input replaced with bucket index . \n

*@par Third-party framework compatibility.
*Compatible with tensorflow Bucketize operator.
*/

REG_OP(Bucketize)
.INPUT(x, TensorType({DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(boundaries, ListFloat)
.OP_END_FACTORY_REG(Bucketize)

/**
*@brief Returns a new tensor with the truncated integer values of the elements of input. \n

*@par Inputs:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n

*@par Outputs:
*y: A tensor with the same type and shape of input_x \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Trunc. \n
*/
REG_OP(Trunc)
.INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
.OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
.OP_END_FACTORY_REG(Trunc)
/**
*@brief Computes the sum along sparse segments of a tensor . \n

*@par Inputs:
*The input indices and segment_ids must have same rank. Inputs include:
*@li x:A Tensor. Must be one of the following types: float, double, int32,
uint8, int16, int8, int64, uint16, uint32, uint64.
*@li indices: A Tensor. Must be one of the following types: int32, int64.
A 1-D tensor. Has same rank as segment_ids.
*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be
sorted and can be repeated . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@par Third-party framework compatibility
*Compatible with tensorflow SparseSegmentSum operator
*/

REG_OP(SparseSegmentSum)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(segment_ids, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(SparseSegmentSum)

/**
*@brief Computes the mean along sparse segments of a tensor . \n

*@par Inputs:
*The input indices and segment_ids must have same rank. Inputs include:
*@li x: A Tensor. Must be one of the following types: float, double.
*@li indices: A Tensor. Must be one of the following types: int32, int64.
A 1-D tensor. Has same rank as segment_ids.
*@li segment_ids: A Tensor of type int32. A 1-D tensor. Values should be
sorted and can be repeated . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n

*@par Third-party framework compatibility
*Compatible with tensorflow SparseSegmentMean operator
*/

REG_OP(SparseSegmentMean)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(segment_ids, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(SparseSegmentMean)

/**
*@brief Computes gradients for SparseSegmentMean . \n

*@par Inputs:
*The input grad must have be type float or double. Inputs include:
*@li grad: A Tensor. Must be one of the following types: float, double.
gradient propagated to the SparseSegmentMean op.
*@li indices: A Tensor. Must be one of the following types: int32, int64.
indices passed to the corresponding SparseSegmentMean op.
*@li segment_ids: A Tensor of type int32. segment_ids passed to the
corresponding SparseSegmentMean op.
*@li output_dim0: A Tensor of type int32. dimension 0 of "x" passed to
SparseSegmentMean op . \n

*@par Outputs:
*y:A Tensor. Has the same type as grad . \n

*@par Third-party framework compatibility
*Compatible with tensorflow SparseSegmentMeanGrad operator
*/

REG_OP(SparseSegmentMeanGrad)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(indices, TensorType({DT_INT32}))
.INPUT(segment_ids, TensorType({DT_INT32}))
.INPUT(output_dim0, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(SparseSegmentMeanGrad)

/**
*@brief Computes the gradient of igamma(a, x) wrt a

*@par Inputs:
*The input a and x must have the same type. Inputs include:
*@li a:A Tensor. Must be one of the following types: float32, double.
*@li x:A Tensor. Must have the same type as a . \n

*@par Outputs:
*y:A Tensor. Has the same type as a . \n

*@par Third-party framework compatibility
*Compatible with tensorflow IgammaGradA operator
*/

REG_OP(IgammaGradA)
.INPUT(a, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(z, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(IgammaGradA)

/**
*@brief Initialize data process channel . \n

*@par Attributes:
*channel_name: A string. Default "" . \n

*@par Third-party framework compatibility
*Compatible with tensorflow InitData operator
*/

REG_OP(InitData)
.ATTR(channel_name, String, "")
.OP_END_FACTORY_REG(InitData)

/**
*@brief Get the next batch of data in data processing . \n

*@par Attributes:
*@li output_types: A nested structure of DType objects corresponding to each
component of an element of this dataset.
*@li output_shapes: A nested structure of TensorShape objects corresponding
to each component of an element of this dataset.
*@li channel_name: A string. Default "" . \n

*@par Outputs:
*y:A nested structure of Tensor objects . \n

*@par Third-party framework compatibility
*Compatible with tensorflow GetNext operator
*/

REG_OP(GetNext)
.DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
.ATTR(output_types, ListInt, {})
.ATTR(output_shapes, ListListInt, {})
.ATTR(output_num, Int, 1)
.ATTR(channel_name, String, "")
.OP_END_FACTORY_REG(GetNext)

/**
*@brief Get dynamic dims after GetNext. \n

*@par Inputs:
*input: A nested structure of Tensor objects, from GetNext's output. \n

*@par Attributes:
*@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
*@li N: Inputs number. \n

*@par Outputs:
*dims: GE unknow dims, a vector of int64. \n
*/

REG_OP(GetDynamicDims)
.DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(shape_info, ListInt)
.REQUIRED_ATTR(N, Int)
.OP_END_FACTORY_REG(GetDynamicDims)

/**
*@brief End of sequence . \n

*@par Inputs:
*x: A Tensor of type uint8 . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x".
*/

REG_OP(EndOfSequence)
.INPUT(x, TensorType({DT_UINT8}))
.OUTPUT(y, TensorType({DT_UINT8}))
.OP_END_FACTORY_REG(EndOfSequence)

/**
*@brief: Computes the Gauss error function of `x` element-wise . \n

*@par Inputs:
*x: A Tensor of type float16, float32 or double. the format can be
* [NCHW,NC1HWC0,NHWC,ND]

*@par Outputs:
*y: A Tensor. Has the same type and format as "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Erf.
*/
REG_OP(Erf)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(Erf)

/**
*@brief: Computes the Gauss complementary error function of "x" element-wise . \n

*@par Inputs:
*x: A Tensor of type float16 ,float32, double . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Erfc.
*/
REG_OP(Erfc)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(Erfc)

/**
*@brief This operation returns a rank 1 histogram counting the number of entries in `values`
* that fell into every bin.The bins are equal width and determined by the arguments
* 'value_range' and 'nbins' . \n

*@par Inputs:
*Three inputs, including:
*@li x: A Tensor of type float32, float16, int32, int64.
*@li range: A Tensor of type float32,float16,int32, int64.
*@li nbins: A Tensor of type int32 . \n

*@par Attributes:
* dtype: An optional attribute. Defaults to "int32" . \n

*@par Outputs:
*y: A Tensor. A Tensor of type int32 or int64 . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow operator HistogramFixedWidth.
*/
REG_OP(HistogramFixedWidth)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(nbins, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
.ATTR(dtype, String, "int32")
.OP_END_FACTORY_REG(HistogramFixedWidth)

/**
*@brief This operation returns a rank 1 histogram counting the number of entries in `values`
* that fell into every bin.The bins are equal width and determined by the arguments
* 'value_range' and 'nbins' . \n

*@par Inputs:
*Two inputs, including:
*@li x: A Tensor of type float32,float16,int32, int64.
*@li range: A Tensor of type float32,float16,int32, int64 . \n

*@par Attributes:
*@li dtype: An optional attribute. Defaults to "int32".
*@li nbins: A required attribute,the type is int32 . \n

*@par Outputs:
*y: A Tensor. A Tensor of type int32 . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow operator HistogramFixedWidth.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use HistogramFixedWidth instead.
*/
REG_OP(HistogramFixedWidthD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(nbins, Int)
.ATTR(dtype, String, "int32")
.OP_END_FACTORY_REG(HistogramFixedWidthD)

/**
*@brief Returns the next representable value of x1 in the direction of x2, element-wise . \n

*@par Inputs:
*The input X1 and x2 must have the same type. Inputs include:
*@li x1:A Tensor. Must be one of the following types: float32, double.
*@li x2:A Tensor. Must have the same type as x1 . \n

*@par Outputs:
*output:A Tensor. Has the same type as x1 . \n

*@par Third-party framework compatibility
*Compatible with tensorflow NextAfter operator
*/
REG_OP(NextAfter)
.INPUT(x1, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(NextAfter)

/**
*@brief Compute element-wise finiteness, return a boolean tensor.

*@par Inputs:
*x:A Tensor.

*@par Outputs:
*y:A Tensor. Has the same shape as x.

*@par Third-party framework compatibility.
*Compatible with tensorflow IsFinite operator.
*/
REG_OP(IsFinite)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(IsFinite)

/**
*@brief Compute element-wise infiniteness, return a boolean tensor.

*@par Inputs:
*x:A Tensor.

*@par Outputs:
*y:A Tensor. Has the same shape as x.

*@par Third-party framework compatibility.
*Compatible with tensorflow IsInf operator.
*/
REG_OP(IsInf)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(IsInf)

/**
*@brief Computes the complex absolute value of a tensor.

*@par Inputs:
*x:A Tensor.

*@par Outputs:
*y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.

*@par Third-party framework compatibility.
*Compatible with tensorflow ComplexAbs operator.
*/
REG_OP(ComplexAbs)
.INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(ComplexAbs)

/**
*@brief Returns which elements of x are NaN.

*@par Inputs:
*x:A Tensor.

*@par Outputs:
*y:A Tensor. Has the same shape as x.

*@par Third-party framework compatibility.
*Compatible with tensorflow IsNan operator.
*/
REG_OP(IsNan)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(IsNan)

/**
*@brief Returns the real part of a complex number.

*@par Inputs:
*input:A Tensor.

*@par Outputs:
*output:A Tensor. Has the same shape as input.

*@par Third-party framework compatibility.
*Compatible with tensorflow Real operator.
*/
REG_OP(Real)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(Real)

/**
*@brief Returns the complex conjugate of a complex number.

*@par Inputs:
*input:A Tensor.

*@par Outputs:
*output:A Tensor. Has the same shape as input.

*@par Third-party framework compatibility.
*Compatible with tensorflow output operator.
*/
REG_OP(Conj)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(Conj)

/**
*@brief The negative log likelihood loss . \n

*@par Inputs:
*The input x and weight must have the same type. Inputs include:
*@li x: A Tensor dtype of float32.
*@li target: A Tensor dtype of int32.
*@li weight: A Tensor dtype of float32 . \n

*@par Attributes:
*reduction: An optional attribute. Defaults to "mean" . \n

*@par Outputs:
*@li y: A Tensor dtype of float32.
*@li total_weight: A Tensor dtype of float32 . \n

*@par Third-party framework compatibility
*Compatible with pytorch NLLLoss operator
*/
REG_OP(NLLLoss)
.INPUT(x, TensorType({DT_FLOAT}))
.INPUT(target, TensorType({DT_INT32}))
.INPUT(weight, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.OUTPUT(total_weight, TensorType({DT_FLOAT}))
.ATTR(reduction, String, "mean")
.ATTR(ignore_index, Int, -100)
.OP_END_FACTORY_REG(NLLLoss)

/**
*@brief The negative log likelihood loss grad . \n

*@par Inputs:
*@li x:A Tensor dtype of float32.
*@li y_grad:A Tensor dtype of float32.
*@li target:A Tensor dtype of int32.
*@li weight:A Tensor dtype of float32.
*@li total_weight:A Tensor dtype of float32 . \n

*@par Attributes:
*reduction: An optional attribute. Defaults to "mean" . \n

*@par Outputs:
*x_grad: A Tensor. Must be the following type: float32 . \n

*@par Third-party framework compatibility
*Compatible with pytorch NLLLossGrad operator
*/
REG_OP(NLLLossGrad)
.INPUT(x, TensorType({DT_FLOAT}))
.INPUT(y_grad, TensorType({DT_FLOAT}))
.INPUT(target, TensorType({DT_INT32}))
.INPUT(weight, TensorType({DT_FLOAT}))
.INPUT(total_weight, TensorType({DT_FLOAT}))
.OUTPUT(x_grad, TensorType({DT_FLOAT}))
.ATTR(reduction, String, "mean")
.ATTR(ignore_index, Int, -100)
.OP_END_FACTORY_REG(NLLLossGrad)

/**
*@brief The ifmr . \n

*@par Inputs:
*@li data:A Tensor of feature map
*@li data_min:A Tensor of min value of feature map.
*@li data_max:A Tensor of max value of feature map.
*@li cumsum:A Tensor of cumsum bin of data . \n

*@par Attributes:
*min_percentile: min init percentile.
*max_percentile: max init percentile.
*search_range: search range.
*search_step: step size of searching.
*with_offset: whether using offset . \n

*@par Outputs:
*scale: optimal scale.
*offset: optimal offset . \n

*@par Third-party framework compatibility
*Compatible with mindspore
*/

REG_OP(IFMR)
.INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_min, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(data_max, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(cumsum, TensorType({DT_INT32}))
.OUTPUT(scale, TensorType({DT_FLOAT}))
.OUTPUT(offset, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(min_percentile, Float)
.REQUIRED_ATTR(max_percentile, Float)
.REQUIRED_ATTR(search_range, ListFloat)
.REQUIRED_ATTR(search_step, Float)
.REQUIRED_ATTR(with_offset, Bool)
.OP_END_FACTORY_REG(IFMR)

/**
*@brief weights adaptive range quantization. \n

*@par Inputs:
*@li w:A Tensor of weights. \n
*@li w_min:A Tensor of weights reduce_min. \n
*@li w_max:A Tensor of weights reduce_max. \n

*@par Attributes:
*num_bits: the bits num used for quantize.
*offset_flag: whether using offset. \n

*@par Outputs:
*y: fake quantized weights. \n

*@par Third-party framework compatibility
*Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(WtsARQ)
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w_min, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(w_max, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(num_bits, Int, 8)
.ATTR(offset_flag, Bool, false)
.OP_END_FACTORY_REG(WtsARQ)

/**
*@brief The acts_ulq. \n

*@par Inputs:
*@li x:A Tensor of feature map
*@li clamp _min:A Tensor of min clamp value of feature map.
*@li clamp _max:A Tensor of max clamp value of feature map.

*@par Attributes:
*fixed_min: fix min to zero.
*num_bits: quant bits. \n

*@par Outputs:
*y: output fake quant feature map.
*clamp_min_mask: where x > clamp_min
*clamp_min_mask: where x < clamp_max
*x_clamped_loss: clamp loss. \n

*@par Third-party framework compatibility
*Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(ActsULQ)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(clamp_min_mask, TensorType({DT_BOOL}))
.OUTPUT(clamp_max_mask, TensorType({DT_BOOL}))
.OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(fixed_min, Bool, false)
.ATTR(num_bits, Int, 8)
.OP_END_FACTORY_REG(ActsULQ)

/**
*@brief The acts_ulq_input_grad. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'

*@par Outputs:
*x_grapd: The gradient of inpust. \n

*@par Third-party framework compatibility
*Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(ActsULQInputGrad)
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(clamp_min_mask, TensorType({DT_BOOL}))
.INPUT(clamp_max_mask, TensorType({DT_BOOL}))
.OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(ActsULQInputGrad)

/**
*@brief The act_ulq_clamp_max_grad. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n

*@par Outputs:
*clamp_max_grad: The gradient of clamp max. \n

*@par Third-party framework compatibility
*Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(ActULQClampMaxGrad)
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(clamp_max_mask, TensorType({DT_BOOL}))
.INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(ActULQClampMaxGrad)

/**
*@brief The act_ulq_clamp_min_grad. \n

*@par Inputs:
*@li y_grad: A Tensor of gradient
*@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
*@li x_clamped_loss: A Tensor of gradient. \n

*@par Outputs:
*clamp_min_grad: The gradient of clamp min. \n

*@par Third-party framework compatibility
*Compatible with mindspore

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/

REG_OP(ActULQClampMinGrad)
.INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(clamp_min_mask, TensorType({DT_BOOL}))
.INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(ActULQClampMinGrad)

/**
* @brief Computes Lp norm.

* @par Inputs:
* @li x: An ND tensor of type float16, float32. \n
*
* @par Attributes:
* @li p: Int, "inf" or "-inf", default value is 2.
* @li axes: ListInt, {} means all axes will be computed.
* @li keepdim: Bool, default is false.
* @li epsilon: Float, default is 1e-12. \n

* @par Outputs:
* @li y: An ND tensor of type float16, float32. The shape of y is depending
* on axes and keepdim. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator LpNorm.
*/
REG_OP(LpNorm)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Int, 2)
.ATTR(axes, ListInt, {})
.ATTR(keepdim, Bool, false)
.ATTR(epsilon, Float, 1e-12)
.OP_END_FACTORY_REG(LpNorm)

/**
* @brief get complex.

* @par Inputs:
* @li real: An ND tensor of type float32. double
* @li imag: An ND tensor of type float32. double \n
*
* @par Outputs:
* @li out: An ND tensor of type complex64, complex128 \n
*/
REG_OP(Complex)
.INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(Tout, Type, DT_COMPLEX64)
.OP_END_FACTORY_REG(Complex)

/**
* @brief deal complex.

* @par Inputs:
* @li input: An ND tensor of type complex64, complex128 \n
*
* @par Outputs:
* @li output: An ND tensor of type float32. double \n
*/
REG_OP(Imag)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(Imag)

/**
* @brief deal complex.

* @par Inputs:
* @li input: An ND tensor of type complex64, complex128 \n
*
* @par Outputs:
* @li output: An ND tensor of type float32. double \n
*/
REG_OP(Angle)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(Angle)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 1048
- 0
third_party/fwkacllib/inc/inc/ops/matrix_calculation_ops.h
File diff suppressed because it is too large
View File


+ 485
- 0
third_party/fwkacllib/inc/inc/ops/nn_batch_norm_ops.h View File

@@ -0,0 +1,485 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file nn_batch_norm_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_

#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Normalizes elements of a specific dimension of eigenvalues (L2) . \n

*@par Inputs:
*One input:
*x: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue . \n

*@par Attributes:
*@li axis: A required attribute of type list, specifying the axis for normalization.
*@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4" . \n

*@par Outputs:
*y: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue for normalization . \n

*@par Third-party framework compatibility
* Compatible with the L2 scenario of PyTorch operator Normalize.
*/
REG_OP(L2Normalize)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(axis, ListInt, {})
.ATTR(eps, Float, 1e-4)
.OP_END_FACTORY_REG(L2Normalize)

/**
*@brief Performs the backpropagation of L2Normalize for training scenarios . \n

*@par Inputs:
* Three inputs, including:
*@li x: A multi-dimensional Tensor of type float16 or float32, specifying
* the eigenvalue of forward inputs.
*@li y: A multi-dimensional Tensor of type float16 or float32, specifying
* the normalization result of the forward output.
*@li dy: A multi-dimensional Tensor of type float16 or float32, specifying
* the reverse input gradient . \n

*@par Attributes:
*@li axis: A required attribute of type int, specifying the axis to be
* normalized.
*@li eps: An optional attribute of type float, specifying the lower limit of
* normalization. Defaults to "1e-4" . \n

*@par Outputs:
*dx: Reverse gradient of eigenvalue "x". Has the same dimensions as "x" . \n

*@par Third-party framework compatibility
* Compatible with the L2 scenario of PyTorch operator NormalizeGrad.
*/
REG_OP(L2NormalizeGrad)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(dx, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(dim, ListInt, {})
.ATTR(eps, Float, 0.0001)
.OP_END_FACTORY_REG(L2NormalizeGrad)

/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
operation is used for training.
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
if the operation is used for training . \n

*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

*@par Outputs:
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n

*@par Third-party framework compatibility
*@li Compatible with the TensorFlow operator fused_batch_norm.
*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
*/
REG_OP(BatchNorm)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm)

/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NDC1HWC0. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NC1HWC0. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
operation is used for training.
*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
if the operation is used for training . \n

*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

*@par Outputs:
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NDC1HWC0. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n

*@par Third-party framework compatibility
*@li Compatible with the TensorFlow operator fused_batch_norm.
*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
*/
REG_OP(BatchNorm3D)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NCDHW")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm3D)
/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC or NCHW supported)
*@li x: A 4D Tensor of type float16 or float32.
*@li scale: A 1D Tensor of type float32, for the scaling factor.
*@li offset: A 1D Tensor of type float32, for the scaling offset.
*@li mean: A 1D Tensor of type float32, for the mean used for inference.
Must be "None" if the operation is used for training.
*@li variance: A 1D Tensor of type float32, for the variance used for inference.
Must be "None" if the operation is used for training . \n

*@par Attributes:
*@li epsilon: An optional float32, specifying the small value
added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation
is used for training or inference. Defaults to "True" . \n

*@par Outputs:
* Five outputs, including: (NHWC or NCHW supported)
*@li y: A 4D Tensor of type float16 or float32, for the normalized "x".
*@li batch_mean: A 1D Tensor of type float32, for the mean of "x".
*@li batch_variance: A 1D Tensor of type float32, for the variance of "x".
*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation.
*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n

*@attention Constraints:
*@li If the operation is used for inference, then output "reserve_space_1"
has the same value as "mean" and output "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator fused_batch_norm_v2.
*/
REG_OP(BatchNormExt2)
.INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input_scale, TensorType({DT_FLOAT}))
.INPUT(input_offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(input_mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(input_variance, TensorType({DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output_mean, TensorType({DT_FLOAT}))
.OUTPUT(output_variance, TensorType({DT_FLOAT}))
.OUTPUT(output_reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(output_reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNormExt2)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Five inputs, including:
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient.
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0.
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0.
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm.
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n

*@par Attributes:
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
*@li data_format: An optional string. Defaults to "NHWC".
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

*@par Outputs:
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale".
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset".
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output.
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n

*@attention Constraints:
* The preceding layer of this operator must be operator BatchNorm . \n

*@see BatchNorm
*@par Third-party framework compatibility
* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNormGrad.
*/
REG_OP(BatchNormGrad)
.INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(reserve_space_1, TensorType({DT_FLOAT}))
.INPUT(reserve_space_2, TensorType({DT_FLOAT}))
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
.OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNormGrad)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Five inputs, including:
*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n

*@par Attributes:
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
*@li data_format: An optional string. Defaults to "NCDHW".
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

*@par Outputs:
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n

*@attention Constraints:
* The preceding layer of this operator must be operator BatchNorm . \n

*@see BatchNorm
*@par Third-party framework compatibility
* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
*/
REG_OP(BatchNorm3DGrad)
.INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(reserve_space_1, TensorType({DT_FLOAT}))
.INPUT(reserve_space_2, TensorType({DT_FLOAT}))
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
.OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NCDHW")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm3DGrad)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Five inputs, including:
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW.
*@li scale: A 4D Tensor of type float32, with format NHWC or NCHW.
*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2.
*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n

*@par Attributes:
*@li epsilon: A required float32. A small float number added to the variance of "x".
*@li data_format: A required string for the format.
*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n

*@par Outputs:
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x".
*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale".
*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset".
*@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW.
*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n

*@attention Constraints:
* The preceding layer of this operator must be BatchNormExt2 . \n

*@see BatchNormExt2
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator FusedBatchNormGradV2.
*/
REG_OP(BatchNormGradExt2)
.INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(reserve_space_1, TensorType({DT_FLOAT}))
.INPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
.OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_3, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BatchNormGradExt2)


/**
*@brief Performs batch normalization . \n

*@par Inputs:
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: A Tensor,represents the mean and the variance's scale factor
*@li scale: An optional tensor of type float16 or float32, no use
*@li offset: An optional tensor of type float16 or float32, no use
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional input, not use
*@par Outputs:
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
*/
REG_OP(BNInference)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(epsilon, Float,1e-5f)
.ATTR(use_global_stats, Bool,true)
.ATTR(mode, Int,1)
.OP_END_FACTORY_REG(BNInference)
/**
*@brief aicpu batch normalization host . \n

*@par Inputs:

*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: An optional float, mean and variance's Scale factor
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional attr, not use
*@par Outputs:
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
*/
REG_OP(BnHost)
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.ATTR(mode, Int, 1)
.ATTR(use_global_stats, Bool, true)
.OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(BnHost)
/**
*@brief Performs batch normalization . \n

*@par Inputs:
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li scale: An optional tensor of type float16 or float32, no use
*@li offset: An optional tensor of type float16 or float32, no use
*@par Attributes:
*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional attr, not use
*@par Outputs:
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
*/
REG_OP(BNInferenceD)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(momentum, Float,0.9)
.ATTR(epsilon, Float,1e-5f)
.ATTR(use_global_stats, Bool,true)
.ATTR(mode, Int,1)
.OP_END_FACTORY_REG(BNInferenceD)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_

+ 1711
- 0
third_party/fwkacllib/inc/inc/ops/nn_calculation_ops.h
File diff suppressed because it is too large
View File


+ 1654
- 0
third_party/fwkacllib/inc/inc/ops/nn_detect_ops.h
File diff suppressed because it is too large
View File


+ 1279
- 0
third_party/fwkacllib/inc/inc/ops/nn_norm_ops.h
File diff suppressed because it is too large
View File


+ 53
- 0
third_party/fwkacllib/inc/inc/ops/nn_ops.h View File

@@ -0,0 +1,53 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file nn_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
#include "graph/operator_reg.h"
#include "nn_pooling_ops.h"

namespace ge {
/**
* @brief Says whether the targets are in the top "k" predictions . \n

* @par Inputs:
* Three inputs, including:
* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
* @li k: A 1D Tensor of the same type as "targets".
* Specifies the number of top elements to look at for computing precision . \n

* @par Outputs:
* precision: A Tensor of type bool . \n

* @attention Constraints:
* @li targets must be non-negative tensor.

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator InTopKV2.
*/
REG_OP(InTopKV2)
.INPUT(predictions, TensorType({DT_FLOAT}))
.INPUT(targets, TensorType(IndexNumberType))
.INPUT(k, TensorType({IndexNumberType}))
.OUTPUT(precision, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(InTopKV2)
}// namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

+ 1608
- 0
third_party/fwkacllib/inc/inc/ops/nn_pooling_ops.h
File diff suppressed because it is too large
View File


+ 2598
- 0
third_party/fwkacllib/inc/inc/ops/nn_training_ops.h
File diff suppressed because it is too large
View File


+ 41
- 0
third_party/fwkacllib/inc/inc/ops/no_op.h View File

@@ -0,0 +1,41 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file no_op.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Does nothing. Only useful as a placeholder for control edges . \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator NoOp.
*/

REG_OP(NoOp)
.OP_END_FACTORY_REG(NoOp)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_

+ 889
- 0
third_party/fwkacllib/inc/inc/ops/nonlinear_fuc_ops.h View File

@@ -0,0 +1,889 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file nonlinear_fuc_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_

#include "graph/operator_reg.h"

namespace ge {
/**
*@brief Computes the for the gelu of "x" . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32

*@par Outputs:
*y: A Tensor. Has the same type as "x".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Gelu
*/
REG_OP(Gelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Gelu)

/**
*@brief Computes the gradient for the gelu of "x" . \n

*@par Inputs:
*Three inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "dy".
* @li y: A Tensor of the same type as "dy" . \n

*@par Outputs:
*z: A Tensor. Has the same type as "dy".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator GeluGrad
*/
REG_OP(GeluGrad)
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(GeluGrad)

/**
*@brief Computes the for the fast_gelu of "x" . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32

*@par Outputs:
*y: A Tensor. Has the same type as "x".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator FastGelu
*/
REG_OP(FastGelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(FastGelu)

/**
*@brief Computes the gradient for the fast_gelu of "x" . \n

*@par Inputs:
*Three inputs, including:
* @li dy: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "dy" . \n

*@par Outputs:
*z: A Tensor. Has the same type as "dy".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator FastGeluGrad
*/
REG_OP(FastGeluGrad)
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(z, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(FastGeluGrad)


/**
*@brief Computes the gradient for the tanh of "x" . \n

*@par Inputs:
*Two inputs, including:
* @li y: A Tensor. Must be one of the following types: float16, float32,
* double, complex64, complex128.
* @li dy: A Tensor of the same type as "y" . \n

*@par Outputs:
*z: A Tensor. Has the same type as "y".
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator TanhGrad.
*/
REG_OP(TanhGrad)
.INPUT(y, TensorType::UnaryDataType())
.INPUT(dy, TensorType::UnaryDataType())
.OUTPUT(z, TensorType::UnaryDataType())
.OP_END_FACTORY_REG(TanhGrad)

/**
*@brief: Computes hyperbolic tangent of "x" element-wise . \n

*@par Inputs:
*One input:
*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, double . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow operator Tanh.
*/
REG_OP(Tanh)
.INPUT(x, TensorType::UnaryDataType())
.OUTPUT(y, TensorType::UnaryDataType())
.OP_END_FACTORY_REG(Tanh)

/**
* @brief Computes rectified linear: "max(x, 0)".
*
* @par Inputs:
* x: A tensor. Must be one of the following types: float32, float64, int32, uint8,
* int16, int8, int64, uint16, float16, qint8.
*
* @par Outputs:
* y: A tensor. Has the same type as "x".
*
* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator Relu.
* @li Compatible with the Caffe operator ReLULayer.
*
*/
REG_OP(Relu)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
DT_INT8, DT_INT32, DT_INT16, DT_INT64,
DT_UINT8, DT_UINT16, DT_QINT8}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE,
DT_INT8, DT_INT32, DT_INT16, DT_INT64,
DT_UINT8, DT_UINT16, DT_QINT8}))
.OP_END_FACTORY_REG(Relu)

/**
* @brief Computes rectified linear 6.
* activations = min(max(x, 0), 6) . \n

* @par Inputs:
* x: A Tensor of type RealNumberType . \n

* @par Outputs:
* y: A Tensor of type RealNumberType . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Relu6.
*/
REG_OP(Relu6)
.INPUT(x, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.OP_END_FACTORY_REG(Relu6)

/**
* @brief Computes rectified linear 6*scale.
* activations = min(max(x, 0), 6*scale) . \n

* @par Inputs:
* x: A Tensor of type RealNumberType . \n

* @par Attributes:
* epsilon: A required scalar. The data type is float32 . \n

* @par Outputs:
* y: A Tensor of type RealNumberType . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Relu6.
*
*@par Restrictions:
*Warning: THIS FUNCTION IS DEPRECATED. Please use Relu6 instead.
*/
REG_OP(Relu6D)
.INPUT(x, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.ATTR(scale, Float, 1.0)
.OP_END_FACTORY_REG(Relu6D)

/**
* @brief Computes rectified linear 6 gradients for a Relu6 operation.
* backprops = gradients * (features > 0) * (features < 6) . \n

* @par Inputs:
* @li features: A Tensor of type RealNumberType.
* @li gradients: A Tensor of type RealNumberType . \n

* @par Outputs:
* backprops: A Tensor of type RealNumberType . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Relu6Grad.
*/
REG_OP(Relu6Grad)
.INPUT(gradients, TensorType::RealNumberType())
.INPUT(features, TensorType::RealNumberType())
.OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(Relu6Grad)
/**
*@brief Calculate the elu_grad_v2 function.
*Applies the element-wise function:
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
*@par Inputs:
*One inputs, including:
* @li grads: A tensor. Must be one of the following types:
* float16, float32.
* @li activations: A tensor. Must be one of the following types:
* float16, float32.
*
*@par Outputs:
*y: A Tensor with the same type and shape of grads's.
*
*@par Attributes:
*@li alpha: scalar parameter, default value = 1.0
*/
REG_OP(EluGradV2)
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(EluGradV2)
/**
* @brief Compute sigmoid of "x" element-wise . \n

* @par Inputs:
* A Tensor of type complex64, complex128, float16, float32 or double . \n

* @par Outputs:
* A Tensor. Has the same type as "x" . \n

* @see Relu()

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Sigmoid.
*/
REG_OP(Sigmoid)
.INPUT(x, TensorType::UnaryDataType())
.OUTPUT(y, TensorType::UnaryDataType())
.OP_END_FACTORY_REG(Sigmoid)

/**
* @brief Computes z = (y - y*y)*dy . \n

* @par Inputs:
* @li y: The input is Tensor, dtype is UnaryDataType.
* @li dy: The input is Tensor, dtype is UnaryDataType . \n

* @par Outputs:
* z: The shape of output, dtype is UnaryDataType.
*/
REG_OP(SigmoidGrad)
.INPUT(y, TensorType(UnaryDataType))
.INPUT(dy, TensorType(UnaryDataType))
.OUTPUT(z, TensorType(UnaryDataType))
.OP_END_FACTORY_REG(SigmoidGrad)

/**
*@brief Computes the binomial normal log likelihood (BNLL) output:
*if x>0, x+log(1+exp(-x)); otherwise log(1+exp(x)) . \n

*@par Inputs:
*x: A Tensor of type double, float16 or float32 . \n

*@par Outputs:
*y: A tensor. Has the same type and format as input "x" . \n

*@par Third-party framework compatibility
* Compatible with the Caffe operator BNLL.
*/
REG_OP(BNLL)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(BNLL)

/**
*@brief Computes softplus: log(exp(x) + 1) . \n

*@par Inputs:
* One input:
*x: A Tensor of type float16 or float32. Up to 8D . \n

*@par Outputs:
*y: The activations tensor. Has the same type and format as input "x"

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Softplus.
*/
REG_OP(Softplus)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(Softplus)

/**
*@brief Computes softplus gradients for a softplus operation . \n

*@par Inputs:
*Two inputs:
* @li gradients: An NC1HWC0 or ND Tensor of type float16 or float32.
* @li features: An NC1HWC0 or ND Tensor of type float16 or float32.


*@par Outputs:
*backprops: A Tensor. Has the same type and format as input "gradients" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator SoftplusGrad.
*/
REG_OP(SoftplusGrad)
.INPUT(gradients, TensorType::FloatingDataType())
.INPUT(features, TensorType::FloatingDataType())
.OUTPUT(backprops, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(SoftplusGrad)

/**
*@brief Computes softsign: x/(abs(x) + 1) . \n

*@par Inputs:
* One input:
*x: A Tensor of type float16 or float32. Up to 8D . \n

*@par Outputs:
*y: The activations tensor. Has the same type and format as "x"

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Softsign.
*/
REG_OP(Softsign)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(Softsign)

/**
*@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n

*@par Inputs:
* One input:
*x: A Tensor. Must be one of the following types: float16, float, double
* int32, int8. format:ND, NC1HWC0 . \n

*@par Outputs:
*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n

*@see Region()

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Selu.
*/
REG_OP(Selu)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,
DT_INT8,DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,
DT_INT8,DT_INT32}))
.OP_END_FACTORY_REG(Selu)

/**
*@brief Computes rectified linear gradients for a ReLU operation . \n

*@par Inputs:
* Two inputs, including:
*@li gradients: A Tensor. Must be one of the following types: float32, double,
* int32, int8, int16, int64, uint16, float16, uint32, uint64
*@li features: A Tensor. Must be one of the following types: float32, double,
* int32, int8, int16, int64, uint16, float16, uint32, uint64

*@par Outputs:
*backprops: A Tensor. Must have the same type as"gradients" . \n

*@attention Constraints:
* The corresponding Relu operator needs to be called before using this operator on the network . \n

*@see Relu

*@par Third-party framework compatibility
* Compatible with TensorFlow operator ReluGrad.
*/
REG_OP(ReluGrad)
.INPUT(gradients, TensorType::RealNumberType())
.INPUT(features, TensorType::RealNumberType())
.OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(ReluGrad)

/**
*@brief Computes rectified linear gradients for a ReLU operation . \n

*@par Inputs:
* Two inputs, including:
*@li gradients: A Tensor. Must be one of the following types: float32, double, int32, int8, int16, int8, int64, uint16, float16, uint32, uint64
*@li mask: A Tensor. Must be the following types: uint8

*@par Outputs:
*backprops: A Tensor. Must have the same type as"gradients" . \n

*@attention Constraints:
* The corresponding Relu operator needs to be called before using this operator on the network . \n

*@see Relu

*@par Third-party framework compatibility
* Compatible with TensorFlow operator ReluGradV2.
*/
REG_OP(ReluGradV2)
.INPUT(gradients, TensorType::RealNumberType())
.INPUT(mask, TensorType({DT_UINT8}))
.OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(ReluGradV2)

/**
*@brief Computes rectified linear: "max(x, 0)".
*
*@attention Constraints:
* The last dimension must be divisible by 8.
* The second output "mask" is "1" (for y >= 0) or "0" ( for y < 0).
*
*@par Inputs:
* x: A tensor. Must be one of the following types: float32, float64, int32, uint8,
* int16, int8, int64, uint16, float16, qint8.
*
*@par Outputs:
*@li y: A tensor. Has the same type as "x".
*@li mask: A tensor of type uint8.
*
*@par Third-party framework compatibility
* Incompatible with TensorFlow or Caffe.
*
*/
REG_OP(ReluV2)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8}))
.OUTPUT(mask, TensorType({DT_UINT8}))
.OP_END_FACTORY_REG(ReluV2)

/**
*@brief Performs parametric ReLU . \n

*@par Inputs:
* Two inputs, including:
*@li x: A multi-dimensional Tensor of type float16 or float32.
*@li weight: A Scalar or 1D Tensor of type float16 or float32, specifying the weight, the initial value of "a". The number of dimensions must be the same as the number of channels . \n

*@par Outputs:
*y: An activated Tensor. Has the same dimensions with "x" . \n

*@par Third-party framework compatibility
* Compatible with PyTorch and Caffe operator PReLU.
*/
REG_OP(PRelu)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(weight, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(PRelu)

/**
*@brief Performs the backpropagation of PRelu for training scenarios . \n

*@par Inputs:
* Three inputs, including:
*@li grads: Input gradient. Multi-dimensional Tensors are supported. The data type can be float16 or float32.
*@li features: A multi-dimensional Tensor of type float16 or float32.
*@li weights: A Scalar or 1D Tensor of type float16 or float32, specifying the weight. The number of dimensions must be the same as the number of channels . \n

*@par Outputs:
*@li dx: Reverse gradient of "features". Has the same dimensions and type as "features".
*@li da: Reverse gradient of "weight". Has the same dimensions and type as "features" . \n

*@par Third-party framework compatibility
* Compatible with PyTorch operator PReluGrad.
*/
REG_OP(PReluGrad)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(weights, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(da, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(PReluGrad)

/**
*@brief Activation function fused from sigmoid and ReLU, with soft saturation
* on the left and no saturation on the right . \n

*@par Inputs:
*x: A float16, float32 or double, for the input data type . \n

*@par Attributes:
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

*@par Outputs:
*y: A float16, float32 or double, for the normalized result . \n

*@attention Constraints:
*@li The input is of type float16 or float32 . \n

*@par Multiple batches supported or not
*Supported
*@par Third-party framework compatibility
*@li Compatible with Tensorflow's Elu operator
*@li Compatible with Caffe's ELULayer operator
*
*@since V100R001C33
*/
REG_OP(Elu)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(Elu)

/**
*@brief Computes gradients for the exponential linear (Elu) operation.
*
*@par Inputs:
*@li grads: A tensor. Must be one of the following types: float16, float32, float64.
* The backpropagated gradients to the corresponding Elu operation.
*@li activations: A tensor. Has the same type as "grads".
* The outputs of the corresponding Elu operation.
*
*@par Outputs:
* y: A tensor. Has the same type as "grads".
*
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator EluGrad.
*
*/
REG_OP(EluGrad)
.INPUT(grads, TensorType::FloatingDataType())
.INPUT(activations, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.OP_END_FACTORY_REG(EluGrad)

/**
*@brief Computes the output as x if x > 0 and negative_slope * x if x <= 0 . \n

*@par Inputs:
* One input:
* x: A Tensor. Must be one of the following types: float32, float16, double.
*
*@par Attributes:
*negative_slope: A float32. Defaults to "0.0".
*
*@par Outputs:
*y: A Tensor. Has the same type as "x".
*@par Third-party framework compatibility
* Compatible with the Caffe operator ReLU.
*/
REG_OP(LeakyRelu)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
.ATTR(negative_slope, Float, 0.0)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE}))
.OP_END_FACTORY_REG(LeakyRelu)

/**
*@brief Computes the output as gradients if features > 0 and negative_slope * gradients if features <= 0 . \n

*@par Inputs:
* Two inputs, including:
* @li gradients: A Tensor. Must be one of the following types: float16, float32, double.
* @li features: A Tensor. Has the same type as "gradients" . \n

*@par Attributes:
*negative_slope: A float32. Defaults to "0.0" . \n

*@par Outputs:
*backprops: A Tensor. Has the same type as "gradients" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator LeakyReluGrad.
*/
REG_OP(LeakyReluGrad)
.INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.ATTR(negative_slope, Float, 0.0)
.OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(LeakyReluGrad)

/**
*@brief Thresholds grad each element of the input Tensor . \n

*@par Inputs:
* @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32.
* @li features: A Tensor shape and dtype of input features. Support float16, int32 . \n

*@par Attributes:
*threshold: A float32 scale value to threshold at . \n

*@par Outputs:
*backprops: A Tensor of shape and dtype of output backprops, should be same shape and type as inputs . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ThresholdGradV2D)
.INPUT(gradients, TensorType({DT_INT32, DT_FLOAT16}))
.INPUT(features, TensorType({DT_INT32, DT_FLOAT16}))
.OUTPUT(backprops, TensorType({DT_INT32, DT_FLOAT16}))
.REQUIRED_ATTR(threshold, Float)
.OP_END_FACTORY_REG(ThresholdGradV2D)

/**
*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n

*@par Inputs:
*x: A Tensor dtype of real number . \n

*@par Attributes:
*@li threshold: A float32 scale value to threshold at.
*@li value: A float32 scale value to replace with . \n

*@par Outputs:
*y: A Tensor of shape and dtype of output, should be same shape and type as input . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(ThresholdV2D)
.INPUT(x, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.REQUIRED_ATTR(threshold, Float)
.REQUIRED_ATTR(value, Float)
.OP_END_FACTORY_REG(ThresholdV2D)

/**
*@brief: Computes hyperbolic tangent of "x" element-wise . \n

*@par Inputs:
*One input:
*x: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow operator Mish.
*/

REG_OP(Mish)
.INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
.OP_END_FACTORY_REG(Mish)

/**
* @brief pytorch hardtanh_backward operator.
*
* @par Inputs:
* 2 inputs, including:
* @li result, minimum tensor of the linear region range,
* datatype: float16/float32, format:ND/5HD.
* @li grad, maximum tensor of the linear region range,
* datatype:float16/float32, format:ND/5HD. \n

* @par Attributes:
* 2 attributes, including:
* @li min_val, minimum value of the linear region range, datatype:float.
* @li max_val, maximum value of the linear region range, datatype:float. \n

* @par Outputs:
* 1 output, including:
* @li y, hardtanh_backward output tensor, datatype and format is same as
* input result. \n

* @attention Constraints:
* This operator only supports dataType: float16/float32, format: ND/5HD. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator HardtanhGrad.
*/
REG_OP(HardtanhGrad)
.INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
.INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */
.ATTR(min_val, Float, -1.0)
.ATTR(max_val, Float, 1.0)
.OP_END_FACTORY_REG(HardtanhGrad)

/**
* @brief Calculates the softplus loss function with attributes of beta and threshold. \n

* @par Inputs:
* One inputs, including:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li beta: An optional float. Defaults to "1.0" \n

* @li threshold: An optional float. Defaults to "20.0" \n

* @par Outputs:
* @li y: A mutable Tensor. Has the same type as "x" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Softplus.
*/
REG_OP(SoftplusV2)
.INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(beta, Float, 1.0)
.ATTR(threshold, Float, 20.0)
.OP_END_FACTORY_REG(SoftplusV2)

/**
* @brief Calculates the reversed outputs of the function "softplus_v2". \n

* @par Inputs:
* Two inputs, including:
* @li input_gradients: A mutable Tensor. Must be one of the following types:
* float16, float32.
* @li input_features: A mutable Tensor of the same type as "input_gradients" \n

* @par Attributes:
* @li beta: An optional float. Defaults to "1.0" \n

* @li threshold: An optional float. Defaults to "20.0" \n

* @par Outputs:
* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftplusGrad.
*/
REG_OP(SoftplusV2Grad)
.INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
.ATTR(beta, Float, 1.0)
.ATTR(threshold, Float, 20.0)
.OP_END_FACTORY_REG(SoftplusV2Grad)

/**
* @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
* where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
*
* @par inputs
* one input including:
* @li x: input A Tensor. Must be one of the following types: float32, float16
*
* @par output
* one output including:
* @li y:A Tensor of the same type as x
*
*/
REG_OP(ThresholdedRelu)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(ThresholdedRelu)

/**
* @brief Calculate the hard shrinkage function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardshrink. \n
*/
REG_OP(HardShrink)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(HardShrink)

/**
* @brief Calculate the hard sigmoid function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32, int32. \n

* @par Attributes:
* @li alpha: An optional float. Defaults to 0.16666666. \n
* @li beta: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Hardsigmoid. \n
*/
REG_OP(HardSigmoid)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(alpha, Float, 0.16666666)
.ATTR(beta, Float, 0.5)
.OP_END_FACTORY_REG(HardSigmoid)

/**
* @brief Calculate the soft shrinkage function. \n

* @par Inputs:
* One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor with the same dtype and shape of input_x's. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Softshrink. \n
*/
REG_OP(SoftShrink)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrink)

/**
* @brief Calculate the reversed outputs of the function "soft_shrink". \n

* @par Inputs:
* Two inputs, including:
* @li input_grad: A tensor. Must be one of the following types:
* float16, float32. \n
* @li input_x: A tensor of the same dtype as "input_grad". \n

* @par Attributes:
* @li lambd: An optional float. Defaults to 0.5. \n

* @par Outputs:
* y: A Tensor of the same dtype and shape as "input_graxd". \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator SoftShrinkGrad. \n
*/
REG_OP(SoftShrinkGrad)
.INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrinkGrad)
/**
*@brief Calculate -ln(1+e^(-x)). \n

*@par Inputs:
*One inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Outputs:
*One outputs, including:
* @li y: A tensor with the same type and shape of x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator LogSigmoid. \n
*/
REG_OP(LogSigmoid)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */
.OP_END_FACTORY_REG(LogSigmoid)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_

+ 122
- 0
third_party/fwkacllib/inc/inc/ops/npu_loss_scale_ops.h View File

@@ -0,0 +1,122 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file npu_loss_scale_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Computes NPU alloc float status operator function . \n

*@par Outputs:
*data: A Tensor of data value. Must be float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUAllocFloatStatusOperator)
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUAllocFloatStatusOperator)

/**
*@brief Computes NPU clear float status operator function . \n

*@par Inputs:
*addr: A Tensor of data memory address. Must be float32 . \n

*@par Outputs:
*data: A Tensor of data value. Must be float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUClearFloatStatusOperator)
.INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUClearFloatStatusOperator)

/**
*@brief Computes NPU get float status operator function . \n

*@par Inputs:
*addr: A Tensor of data memory address. Must be float32 . \n

*@par Outputs:
*data: A Tensor of data value. Must be float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUGetFloatStatusOperator)
.INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUGetFloatStatusOperator)

/**
*@brief Produces a variable with 0 in memory . \n

*@par Outputs:
*y: A Tensor of type int32, output eight numbers with a value of zero.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUAllocFloatStatus)
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUAllocFloatStatus)

/**
*@brief Set the value of address 0x40000 to 0 in each core . \n

*@par Inputs:
*addr: A tensor of type float32 . \n

*@par Outputs:
*data: A Tensor of type float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUClearFloatStatus)
.INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUClearFloatStatus)

/**
*@brief Get the value of address 0x40000 . \n

*@par Inputs:
*addr: A tensor of type float32 . \n

*@par Outputs:
*data: A Tensor of type float32.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(NPUGetFloatStatus)
.INPUT(addr, TensorType{DT_FLOAT})
.OUTPUT(data, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(NPUGetFloatStatus)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_

+ 27
- 0
third_party/fwkacllib/inc/inc/ops/outfeed_ops.h View File

@@ -0,0 +1,27 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file outfeed_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_

#include "data_flow_ops.h"

#endif // OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_


+ 445
- 0
third_party/fwkacllib/inc/inc/ops/pad_ops.h View File

@@ -0,0 +1,445 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file pad_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_

#include "graph/operator_reg.h"
namespace ge {

/**
*@brief Creates a tensor filled with a scalar value.
* This operation creates a tensor of shape "dims" and fills it with "value".
*
*@par Inputs:
*@li dims: A 1D tensor of types int32 or int64. Represents the shape of the output tensor . \n

*@li value: A 0D scalar. Specifies the value to fill the returned tensor.
* Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*
*@par Outputs:
* y: A tensor. Has the same type as "value".
*
*@par Third-party framework compatibility
*@li Compatible with the TensorFlow operator Fill.
*@li Compatible with the Caffe operator Filler.
*
*/
REG_OP(Fill)
.INPUT(dims, TensorType::IndexNumberType())
.INPUT(value, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Fill)

/**
*@brief Creates a tensor filled with a scalar value.
* This operation creates a tensor of shape "dims" and fills it with "value".
*
*@par Inputs:
* value: A 0D scalar for the value to fill the returned tensor. Must be one of
* the following types:
* float16, float32, uint8, int8, int16, int32, int64, quint8, qint8, qint32
*
*@par Attributes:
* dims: A tensor. Must be one of the following types:"int32"
* 1-D. Represents the shape of the output tensor.
*
*@par Outputs:
* y: A tensor. Has the same type as "value".
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead.
*/
REG_OP(FillD)
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
DT_UINT16, DT_UINT8, DT_INT32, DT_INT64,
DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16,
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
DT_UINT64, DT_BOOL, DT_DOUBLE}))
.REQUIRED_ATTR(dims, ListInt)
.OP_END_FACTORY_REG(FillD)

/**
*@brief Broadcasts an array for a compatible shape.
* Broadcasting is the process of making arrays to have compatible shapes
* for arithmetic operations. Two shapes are compatible if for each
* dimension pair they are either equal or one of them is one. When trying
* to broadcast a Tensor to a shape, it starts with the trailing dimensions,
* and works its way forward.
*
*@par Inputs:
*@li x: A tensor.
*@li shape: A tensor of type int32.
* A 1D tensor of type int32, for the shape of the desired output.
*
*@par Outputs:
* y: A tensor. Has the same type as "x".
*
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator BroadcastTo.
*
*/
REG_OP(BroadcastTo)
.INPUT(x, TensorType::BasicType())
.INPUT(shape, TensorType({DT_INT32}))
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(BroadcastTo)

/**
*@brief Broadcasts an array for a compatible shape.
* Broadcasting is the process of making arrays to have compatible shapes
* for arithmetic operations. Two shapes are compatible if for each
* dimension pair they are either equal or one of them is one. When trying
* to broadcast a Tensor to a shape, it starts with the trailing dimensions,
* and works its way forward.
*
*@par Inputs:
* x: A tensor. A tensor to broadcast.
*
*@par Attributes:
* shape: A tensor of type int32.
* A 1D tensor of type int32, for the shape of the desired output.
*
*@par Outputs:
* y: A tensor. Has the same type as "x".
*
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator BroadcastTo.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead.
*/
REG_OP(BroadcastToD)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.REQUIRED_ATTR(shape, ListInt)
.OP_END_FACTORY_REG(BroadcastToD)

/**
*@brief Pads a tensor . \n

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, uint32, uint64.
* @li paddings: A Tensor of type int32 or int64 . \n

*@par Outputs:
*y: A Tensor of the same type as "x" . \n

*@par Third-party framework compatibility:
* Compatible with TensorFlow operator Pad.
*/
REG_OP(Pad)
.INPUT(x, TensorType::BasicType())
.INPUT(paddings, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Pad)

/**
*@brief Pads a tensor . \n

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n

*@par Attributes:
*paddings: An optional "vector<vector<int>>". Defaults to "{}".
* For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the
* contents of tensor in that dimension . \n

*@par Outputs:
*y: A Tensor of the same type as "x" . \n

*@par Third-party framework compatibility:
* Compatible with TensorFlow operator Pad.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
*/
REG_OP(PadD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(paddings, ListListInt)
.OP_END_FACTORY_REG(PadD)

/**
*@brief Pads a tensor . \n

*@par Inputs:
*Three inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, uint32, uint64.
* @li constant_values: A Tensor. Must have the same type as input.
* @li paddings: A Tensor of type int32 or int64 . \n

*@par Outputs:
*y: A Tensor of the same type as "x" . \n

*@par Third-party framework compatibility:
* Compatible with TensorFlow operator Pad.
*/
REG_OP(PadV2)
.INPUT(x, TensorType::BasicType())
.INPUT(paddings, TensorType::IndexNumberType())
.INPUT(constant_values, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(PadV2)

/**
*@brief Pads a tensor . \n

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
*constant_values: A Tensor. Must have the same type as input.

*@par Attributes:
*paddings: An optional "vector<vector<int>>". Defaults to "{}".
* For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the
* contents of tensor in that dimension . \n

*@par Outputs:
*y: A Tensor of the same type as "x" . \n

*@par Third-party framework compatibility:
* Compatible with TensorFlow operator PadV2.
*/
REG_OP(PadV2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(constant_values, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(paddings, ListListInt)
.OP_END_FACTORY_REG(PadV2D)

/**
*@brief Pads a tensor.

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, uint32, uint64.
* @li paddings: A Tensor of type int32 or int64.
* @li constant_values: A optional Tensor of int32 or int64

*@par Attributes:
* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
* support "constant", "reflect", "edge"
* @li paddings_contiguous: An optional bool value, Defaults to true.
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]

*@par Outputs:
*y: A Tensor of the same type as "x".

*@par Third-party framework compatibility:
* Compatible with ONNX operator Pad.
*/
REG_OP(PadV3)
.INPUT(x, TensorType::BasicType())
.INPUT(paddings, TensorType::IndexNumberType())
.OPTIONAL_INPUT(constant_values, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.ATTR(mode, String, "constant")
.ATTR(paddings_contiguous, Bool, true)
.OP_END_FACTORY_REG(PadV3)

/**
*@brief Pads a tensor.

*@par Inputs:
*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.

*@par Attributes:
* @li paddings: An required "vector<vector<int>>".
* For each dimension D of input, paddings[D, 0] indicates how many
* values to add before the contents of tensor in that dimension,
* and paddings[D, 1] indicates how many values to add after the
* contents of tensor in that dimension.
* @li constant_values: An optional int value for pad.
* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
* support "constant", "reflect", "edge"
* @li paddings_contiguous: An optional bool value, Defaults to true.
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]

*@par Outputs:
*y: A Tensor of the same type as "x".

*@par Third-party framework compatibility:
* Compatible with ONNX operator Pad.

* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
*/
REG_OP(PadV3D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
.REQUIRED_ATTR(paddings, ListListInt)
.ATTR(constant_values, Int, 0)
.ATTR(mode, String, "constant")
.ATTR(paddings_contiguous, Bool, true)
.OP_END_FACTORY_REG(PadV3D)

/**
*@brief Create a diagonal tensor

*@par Inputs:
*Two inputs, including:
* @li x: A mutable Tensor. Must be one of the following types:
* float16, float32, int32 . \n

* @li assist: A mutable Tensor with rank k is at most 1,
* Has the same type as "x" . \n

*@par Outputs:
*y: A mutable Tensor. Has the same type as "x" . \n

*@see Diag()
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Diag.
*
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead.
*/
REG_OP(DiagD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OP_END_FACTORY_REG(DiagD)

/**
*@brief Create a diagonal tensor

*@par Inputs:
*One input, include:
* x: A mutable Tensor with rank k, where k is at most 1. Must be one of the
* following types:
* float16, float32, double, int32, int64, complex64, complex128 . \n

*@par Outputs:
*y: A mutable Tensor. Has the same type as "x" . \n

*@see DiagD()
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Diag.
*/
REG_OP(Diag)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32,
DT_INT64, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(Diag)

/**
*@brief Ascend Padding, pad the last dimension of input

*@par Inputs:
*One input, include:
*x: Tensor which last dimension must be 1. For example: [624000, 1] . \n

*@par Outputs:
*y: Padding the last dimension of x to padDimSize, [624000, padDimSize] . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Diag.
*/
REG_OP(AscendPadding)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.ATTR(pad_dim_size, Int, 8)
.OP_END_FACTORY_REG(AscendPadding)


/**
*@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n

*@par Restrictions:
*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n

*@par Inputs:
*One input, include:
*addr_table: Tensor which last dimension must be 3. For example: [8, 3].
*index: Tensor For example: [640000].
*@par Outputs:
*rank_id: Tensor the first dimension of index to Size, [size, 3].
Tensor which last dimension must be 3.For example: [640000, 3]
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Diag.
*/
REG_OP(EmbeddingRankId)
.INPUT(addr_table, TensorType({DT_UINT64}))
.INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT64}))
.OUTPUT(rank_id, TensorType({DT_UINT64}))
.ATTR(row_memory, Int, 320)
.ATTR(mode, String, "mod")
.OP_END_FACTORY_REG(EmbeddingRankId)

/**
* @brief Fill the value to a tensor has the specified shape.

* @par Inputs:
* One inputs, including:
* @li dims: An Tensor, specify the shape that the value to fill.

* @par Attributes:
* @li value: An optional float value. Defaults to 0.0.

* @par Outputs:
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.

* @par Third-party framework compatibility
* Compatible with the ONNX operator ConstantOfShape.
*/
REG_OP(FillV2)
.INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0)
.OP_END_FACTORY_REG(FillV2)

/**
* @brief Fill the value to a tensor has the specified shape.

* @par Attributes:
* @li value: An optional float value. Defaults to 0.0.

* @li dims: An required listInt to specify the shape that the value to fill.

* @par Outputs:
* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.

* @par Third-party framework compatibility
* Compatible with the ONNX operator ConstantOfShape.
*/
REG_OP(FillV2D)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
.ATTR(value, Float, 0)
.REQUIRED_ATTR(dims, ListInt)
.OP_END_FACTORY_REG(FillV2D)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_

+ 56
- 0
third_party/fwkacllib/inc/inc/ops/parsing_ops.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file parsing_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {

/**
*@brief Converts each string in the input Tensor to the specified numeric type . \n

*@par Inputs:
*Inputs include:
*x: A Tensor. Must be one of the following types: string . \n

*@par Attributes:
*out_type: The numeric type to interpret each string in string_tensor as . \n

*@par Outputs:
*y: A Tensor. Has the same type as x . \n

*@attention Constraints:
*The implementation for StringToNumber on Ascend uses AICPU, with bad performance. \n

*@par Third-party framework compatibility
*@li compatible with tensorflow StringToNumber operator.
*/
REG_OP(StringToNumber)
.INPUT(x, TensorType({DT_STRING}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
.ATTR(out_type, Type, DT_FLOAT)
.OP_END_FACTORY_REG(StringToNumber)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_

+ 224
- 0
third_party/fwkacllib/inc/inc/ops/quantize_ops.h View File

@@ -0,0 +1,224 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file quantize_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
#include "graph/operator_reg.h"

namespace ge {

/**
* @brief Dequantizes the input tensor into a float tensor.
* [min_range, max_range] are float32 tensors that specify the range
* for "y".
* The "mode" attribute controls exactly which calculations are used to convert
* the float values to their quantized equivalents.
* @par Inputs:
* @li x: A Tensor. Must be one of the following types: int8, uint8,
* int32.
* @li min_range: A Tensor of type float32.
* Specifies the minimum scalar value possibly produced for the input.
* @li max_range: A Tensor of type float32.
* Specifies the maximum scalar value possibly produced for the input . \n

* @par Attributes:
* mode: An optional string from: "MIN_COMBINED", "MIN_FIRST", and "SCALED".
* Defaults to "MIN_COMBINED" . \n

* @par Outputs:
* y: A dictionary of type float32 . \n

* @attention Constraints:
* @li "min_range" and "max_range" have the same shapes.
* @li "x" and "y" have the same shapes . \n

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator Dequantize.
*/
REG_OP(Dequantize)
.INPUT(x, TensorType(DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16))
.INPUT(min_range, TensorType{DT_FLOAT})
.INPUT(max_range, TensorType{DT_FLOAT})
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(mode, String, "MIN_COMBINED")
.OP_END_FACTORY_REG(Dequantize)

/**
*@brief Quantizes the input . \n

*@par Inputs:
*x: An NC1HWC0 tensor of type float16 or float32, specifying the input . \n

*@par Attributes:
*@li scale: A required float32, specifying the scaling ratio.
*@li offset: A required float16, specifying the offset.
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
*@li round_mode: An optional string, specifying the float16 to int8 cast type.
* The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n

*@par Outputs:
*y: The quantized output tensor of type int8 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendQuant)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
.OUTPUT(y, TensorType({DT_INT8}))
.REQUIRED_ATTR(scale, Float)
.REQUIRED_ATTR(offset, Float)
.ATTR(sqrt_mode, Bool, false)
.ATTR(round_mode, String, "Round")
.OP_END_FACTORY_REG(AscendQuant)

/**
*@brief Dequantizes the input . \n

*@par Inputs:
*@li x: An NC1HWC0 tensor of type int32, specifying the input.
*@li deq_scale: An NC1HWC0 tensor of type float16 or uint64, specifying the scaling ratio . \n

*@par Attributes:
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False".
*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT" . \n

*@par Outputs:
*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendDequant)
.INPUT(x, TensorType({DT_INT32}))
.INPUT(deq_scale, TensorType({DT_FLOAT16, DT_UINT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(sqrt_mode, Bool, false)
.ATTR(relu_flag, Bool, false)
.ATTR(dtype, Int, DT_FLOAT)
.OP_END_FACTORY_REG(AscendDequant)

/**
*@brief Anti quantizes the input . \n

*@par Inputs:
*x: An NC1HWC0 tensor of type int8, specifying the input . \n

*@par Attributes:
*@li scale: A required float32 scale.
*@li offset: A required float32 offset.
*@li dtype: A optional int32, specifying the output data type. Defaults to "DT_FLOAT".
*@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False" . \n

*@par Outputs:
*y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendAntiQuant)
.INPUT(x, TensorType({DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(scale, Float)
.REQUIRED_ATTR(offset, Float)
.ATTR(dtype, Int, DT_FLOAT)
.ATTR(sqrt_mode, Bool, false)
.OP_END_FACTORY_REG(AscendAntiQuant)

/**
*@brief Dequantizes the input of int16 . \n

*@par Inputs:
*@li x0: An NC1HWC0 tensor of type int32, specifying the input.
*@li deq_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
*@li x1: An NC1HWC0 tensor of type int16, specifying the input . \n

*@par Attributes:
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n

*@par Outputs:
*y: The dequantized output tensor of type int16 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendDequantS16)
.INPUT(x0, TensorType({DT_INT32}))
.INPUT(deq_scale, TensorType({DT_UINT64}))
.OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
.OUTPUT(y, TensorType({DT_INT16}))
.ATTR(relu_flag, Bool, false)
.OP_END_FACTORY_REG(AscendDequantS16)

/**
*@brief Requantizes the input . \n

*@par Inputs:
*@li x: An NC1HWC0 tensor of type int32, specifying the input.
*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio . \n

*@par Attributes:
*relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n

*@par Outputs:
*y: The dequantized output tensor of type int8 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendRequant)
.INPUT(x, TensorType({DT_INT32}))
.INPUT(req_scale, TensorType({DT_UINT64}))
.OUTPUT(y, TensorType({DT_INT8}))
.ATTR(relu_flag, Bool, false)
.OP_END_FACTORY_REG(AscendRequant)

/**
*@brief Requantizes the input of int16 . \n

*@par Inputs:
*@li x: An NC1HWC0 tensor of type int16, specifying the input.
*@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
*@li x1: An NC1HWC0 tensor of type int16 . \n

*@par Attributes:
*@li dual_output: A optional bool, specifying whether to perform dual ouput, either "True" or "False". Defaults to "False".
*@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n

*@par Outputs:
*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
*@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n

*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(AscendRequantS16)
.INPUT(x, TensorType({DT_INT16}))
.INPUT(req_scale, TensorType({DT_UINT64}))
.OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
.OUTPUT(y, TensorType({DT_INT8}))
.OUTPUT(y1, TensorType({DT_INT16}))
.ATTR(dual_output, Bool, false)
.ATTR(relu_flag, Bool, false)
.OP_END_FACTORY_REG(AscendRequantS16)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_

+ 65
- 0
third_party/fwkacllib/inc/inc/ops/ragged_array_ops.h View File

@@ -0,0 +1,65 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file ragged_array_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_

#include "graph/operator.h"
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Gather ragged slices from `params` axis `0` according to `indices` . \n

*@par Inputs:
*@li params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
*params` RaggedTensor input. It's a dynamic input.
*@li params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
*at the python level from dense_values to flat_values, so dense_values is the
*deprecated name.
*@li indices: Indices in the outermost dimension of `params` of the values that should be
*gathered.
*@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
*this number of `row_splits` tensors. This value should equal
*`indices.shape.ndims + params.ragged_rank - 1` . \n

*@par Outputs:
*y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
*returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n

*@par Third-party framework compatibility
* Compatible with tensorflow RaggedGather operator.
*/

REG_OP(RaggedGather)
.DYNAMIC_INPUT(params_nested_splits, TensorType({DT_INT32, DT_INT64}))
.INPUT(params_dense_values, TensorType({DT_INT32, DT_INT64}))
.INPUT(indices, TensorType({DT_INT32, DT_INT64}))
.DYNAMIC_OUTPUT(output_nested_splits, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(output_dense_values, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(Tsplits, Type)
.ATTR(PARAMS_RAGGED_RANK, Int, 1)
.ATTR(OUTPUT_RAGGED_RANK, Int, 0)
.OP_END_FACTORY_REG(RaggedGather)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_

+ 98
- 0
third_party/fwkacllib/inc/inc/ops/ragged_conversion_ops.h View File

@@ -0,0 +1,98 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file ragged_conversion_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Converts a RaggedTensor into a SparseTensor with the same values . \n

*@par Inputs:
*Two inputs, including:
*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type
in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input.
*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor
Must be one of the following types: bool, int8, int16, uint16, int32,
int64, double, float, float16 . \n

*@par Attributes:
*@li RAGGED_RANK: the dynamic of input rt_nested_splits with type int.
*@li Tsplits: A required attribute, the type is int64 . \n

*@par Outputs:
*@li sparse_indices: A Tensor of type int64.
*@li sparse_values: A Tensor. Has the same type as rt_dense_values.
*@li sparse_dense_shape: A Tensor of type int64 . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow operator RaggedTensorToSparse.
*/
REG_OP(RaggedTensorToSparse)
.DYNAMIC_INPUT(rt_nested_splits, TensorType({DT_INT32, DT_INT64}))
.INPUT(rt_dense_values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.OUTPUT(sparse_indices, TensorType({DT_INT64}))
.OUTPUT(sparse_values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.OUTPUT(sparse_dense_shape, TensorType({DT_INT64}))
.ATTR(RAGGED_RANK, Int, 1)
.ATTR(Tsplits, Type, DT_INT64)
.OP_END_FACTORY_REG(RaggedTensorToSparse)

/**
*@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n

*@par Inputs:
*Six inputs, including:
*@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
*@li values:A 1D tensor representing the values of the ragged tensor.
*@li default_value:A `Tensor`. Must have the same type as `values`.
*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same
type in: `int64`, `int32` . It's a dynamic input.\n

*@par Attributes:
*@li num_row_partition_tensors:Numbers of row partition tensors.
*@li row_partition_types: A list of `strings`.
The types of the row partition tensors. At present, these can be:
* "ROW_SPLITS": the row_splits tensor from the ragged tensor.
* "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor.
* "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it
is preceeded by "FIRST_DIM_SIZE" . \n

*@par Outputs:
*@li result: A `Tensor`. Has the same type as `values`.
*/
REG_OP(RaggedTensorToTensor)
.INPUT(shape, TensorType({DT_INT32, DT_INT64}))
.INPUT(values, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.INPUT(default_value, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16,
DT_UINT16, DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.DYNAMIC_INPUT(row_partition_tensors, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(result, TensorType({DT_BOOL, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16,
DT_INT32, DT_INT64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(num_row_partition_tensors, Int)
.REQUIRED_ATTR(row_partition_types, ListString)
.OP_END_FACTORY_REG(RaggedTensorToTensor)


} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_

+ 60
- 0
third_party/fwkacllib/inc/inc/ops/ragged_math_ops.h View File

@@ -0,0 +1,60 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file ragged_math_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_

#include "graph/operator.h"
#include "graph/operator_reg.h"

namespace ge {

/**
*@brief Returns a `RaggedTensor` containing the specified sequences of numbers . \n

*@par Inputs:
*@li starts: The starts of each range.
*@li limits: The limits of each range.
*@li deltas: The deltas of each range . \n

*@par Outputs:
*y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n

*@attention Constraints:
*The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
*The vector inputs must all have the same size. Scalar inputs are broadcast
*to match the size of the vector inputs . \n

*@par Third-party framework compatibility
* Compatible with tensorflow RaggedRange operator.
*/

REG_OP(RaggedRange)
.INPUT(starts, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
.INPUT(limits, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
.INPUT(deltas, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
.OUTPUT(rt_nested_splits, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(rt_dense_values, TensorType({DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64}))
.REQUIRED_ATTR(Tsplits, Type)
.OP_END_FACTORY_REG(RaggedRange)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save