Browse Source

Pre Merge pull request !1082 from shenwei41/sss

pull/1082/MERGE
shenwei41 Gitee 5 years ago
parent
commit
8e903d9619
42 changed files with 2197 additions and 897 deletions
  1. +4
    -4
      inc/external/acl/acl.h
  2. +46
    -39
      inc/external/acl/acl_base.h
  3. +190
    -166
      inc/external/acl/acl_mdl.h
  4. +82
    -37
      inc/external/acl/acl_op.h
  5. +28
    -19
      inc/external/acl/acl_op_compiler.h
  6. +21
    -21
      inc/external/acl/acl_prof.h
  7. +79
    -61
      inc/external/acl/acl_rt.h
  8. +19
    -12
      inc/external/acl/acl_tdt.h
  9. +14
    -0
      inc/external/acl/error_codes/ge_error_codes.h
  10. +68
    -67
      inc/external/acl/error_codes/rt_error_codes.h
  11. +138
    -41
      inc/external/acl/ops/acl_cblas.h
  12. +222
    -147
      inc/external/acl/ops/acl_dvpp.h
  13. +6
    -8
      inc/external/acl/ops/acl_fv.h
  14. +16
    -17
      inc/external/hccl/hccl.h
  15. +42
    -42
      inc/external/hccl/hccl_types.h
  16. +68
    -67
      inc/external/runtime/rt_error_codes.h
  17. +10
    -5
      third_party/fwkacllib/inc/ops/batch_ops.h
  18. +44
    -3
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  19. +80
    -0
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  20. +16
    -0
      third_party/fwkacllib/inc/ops/hcom_ops.h
  21. +58
    -2
      third_party/fwkacllib/inc/ops/image_ops.h
  22. +46
    -38
      third_party/fwkacllib/inc/ops/linalg_ops.h
  23. +1
    -1
      third_party/fwkacllib/inc/ops/list_ops.h
  24. +68
    -0
      third_party/fwkacllib/inc/ops/math_ops.h
  25. +21
    -0
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  26. +104
    -0
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  27. +8
    -8
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  28. +46
    -1
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  29. +41
    -0
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  30. +28
    -1
      third_party/fwkacllib/inc/ops/nn_ops.h
  31. +43
    -1
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  32. +153
    -2
      third_party/fwkacllib/inc/ops/reduce_ops.h
  33. +68
    -2
      third_party/fwkacllib/inc/ops/selection_ops.h
  34. +3
    -3
      third_party/fwkacllib/inc/ops/sparse_ops.h
  35. +96
    -0
      third_party/fwkacllib/inc/ops/spectral_ops.h
  36. +9
    -9
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  37. +1
    -1
      third_party/fwkacllib/inc/ops/string_ops.h
  38. +19
    -0
      third_party/fwkacllib/inc/runtime/config.h
  39. +5
    -0
      third_party/fwkacllib/inc/runtime/dev.h
  40. +35
    -0
      third_party/fwkacllib/inc/runtime/kernel.h
  41. +14
    -0
      third_party/fwkacllib/inc/runtime/rt_model.h
  42. +137
    -72
      third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h

+ 4
- 4
inc/external/acl/acl.h View File

@@ -26,9 +26,9 @@ extern "C" {
#endif

// Current version is 1.0.0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0
#define ACL_MAJOR_VERSION 1
#define ACL_MINOR_VERSION 0
#define ACL_PATCH_VERSION 0

/**
* @ingroup AscendCL
@@ -70,4 +70,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_
#endif // INC_EXTERNAL_ACL_ACL_H_

+ 46
- 39
inc/external/acl/acl_base.h View File

@@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
static const int ACL_ERROR_INVALID_OPP_PATH = 148049;

static const int ACL_ERROR_BAD_ALLOC = 200000;
static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
@@ -134,42 +135,42 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005;
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
ACL_STRING = 13,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_NC1HWC0_C04 = 12,
ACL_FORMAT_NDHWC = 27,
ACL_FORMAT_FRACTAL_NZ = 29,
ACL_FORMAT_NCDHW = 30,
ACL_FORMAT_NDC1HWC0 = 32,
ACL_FRACTAL_Z_3D = 33
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

/**
@@ -304,7 +305,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
aclFormat format);

/**
@@ -326,7 +329,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
size_t dimsCount,
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

/**
@@ -423,7 +427,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
size_t index,
size_t dimRangeNum,
int64_t *dimRange);

/**
@@ -460,7 +466,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
@@ -548,7 +554,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu
*
* @retval null for failed.
* @retval OtherValues success.
*/
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);

/**
@@ -559,7 +565,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc,
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);

/**
@@ -599,12 +605,13 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);
const char *fmt, ...);

#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 190
- 166
inc/external/acl/acl_mdl.h View File

@@ -27,19 +27,19 @@
extern "C" {
#endif

#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
#define ACL_MDL_LOAD_FROM_FILE 1
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
#define ACL_MDL_LOAD_FROM_MEM 3
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
@@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo;
typedef struct aclmdlConfigHandle aclmdlConfigHandle;

typedef enum {
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
} aclAippInputFormat;

typedef enum {
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
ACL_MDL_PRIORITY_INT32 = 0,
ACL_MDL_LOAD_TYPE_SIZET,
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
ACL_MDL_MEM_SIZET,
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
ACL_MDL_WEIGHT_SIZET,
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
ACL_MDL_WORKSPACE_SIZET,
ACL_MDL_INPUTQ_NUM_SIZET,
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
ACL_MDL_OUTPUTQ_NUM_SIZET,
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
} aclmdlConfigAttr;

typedef enum {
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
ACL_DATA_WITHOUT_AIPP = 0,
ACL_DATA_WITH_STATIC_AIPP,
ACL_DATA_WITH_DYNAMIC_AIPP,
ACL_DYNAMIC_AIPP_NODE
} aclmdlInputAippType;

typedef struct aclmdlIODims {
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
} aclmdlIODims;

typedef struct aclAippDims {
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
} aclAippDims;

typedef struct aclmdlBatch {
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
} aclmdlBatch;

typedef struct aclmdlHW {
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
} aclmdlHW;

typedef struct aclAippInfo {
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
} aclAippInfo;

/**
@@ -339,7 +339,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize,
uint32_t *modelId);

/**
* @ingroup AscendCL
@@ -361,8 +362,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
size_t workSize, void *weightPtr, size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -385,9 +387,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
void *workPtr, size_t workSize, void *weightPtr,
size_t weightSize);
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
@@ -422,8 +424,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
size_t outputQNum);
const uint32_t *inputQ, size_t inputQNum,
const uint32_t *outputQ, size_t outputQNum);

/**
* @ingroup AscendCL
@@ -453,8 +455,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -829,11 +831,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
@@ -849,7 +851,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);

/**
@@ -863,7 +865,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);

/**
@@ -878,7 +880,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
int32_t srcImageSizeH);

@@ -898,10 +900,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
int32_t scfInputSizeH, int32_t scfOutputSizeW,
int32_t scfOutputSizeH, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
int8_t scfSwitch,
int32_t scfInputSizeW,
int32_t scfInputSizeH,
int32_t scfOutputSizeW,
int32_t scfOutputSizeH,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -919,9 +925,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
int8_t cropSwitch,
int32_t cropStartPosW,
int32_t cropStartPosH,
int32_t cropSizeW,
int32_t cropSizeH,
uint64_t batchIndex);

/**
@@ -940,7 +950,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
int32_t paddingSizeTop, int32_t paddingSizeBottom,
int32_t paddingSizeLeft, int32_t paddingSizeRight,
@@ -961,10 +971,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1,
int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -981,10 +994,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
float dtcPixelMinChn1, float dtcPixelMinChn2,
float dtcPixelMinChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
float dtcPixelMinChn0,
float dtcPixelMinChn1,
float dtcPixelMinChn2,
float dtcPixelMinChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1001,10 +1017,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3, uint64_t batchIndex);
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
@@ -1020,8 +1039,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1038,8 +1059,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
@@ -1057,8 +1080,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId,
size_t index,
aclmdlInputAippType *type,
size_t *dynamicAttachedDataIndex);

/**
@@ -1075,7 +1100,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);

/**
@@ -1094,11 +1119,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind
*
* @retval ACL_SUCCESS The function is successfully executed
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
size_t *numInputs, aclTensorDesc **outputDesc,
size_t *numOutputs);
*/
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId,
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs,
aclTensorDesc **outputDesc, size_t *numOutputs);

/**
* @ingroup AscendCL
@@ -1106,7 +1130,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlInitDump();

/**
@@ -1117,7 +1141,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);

/**
@@ -1126,7 +1150,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();

/**
@@ -1138,7 +1162,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);

/**
@@ -1148,7 +1172,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand
* @retval the aclmdlConfigHandle pointer
*
* @see aclmdlDestroyConfigHandle
*/
*/
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();

/**
@@ -1177,10 +1201,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
const void *attrValue, size_t valueSize);
const void *attrValue, size_t valueSize);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_

+ 82
- 37
inc/external/acl/acl_op.h View File

@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length);
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);
const uint8_t *values);

/**
* @ingroup AscendCL
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);
const int64_t *values);

/**
* @ingroup AscendCL
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);
const float *values);

/**
* @ingroup AscendCL
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);
const char **values);

/**
* @ingroup AscendCL
@@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
const int *numValues, const int64_t *const values[]);
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);

/**
* @ingroup AscendCL
@@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char
* @retval OtherValues Failure
*/
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[], int numOutputs,
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclDataBuffer *outputs[],
aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopHandle **handle);

/**
@@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
const aclDataBuffer *const inputs[], int numOutputs,
aclDataBuffer *const outputs[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
aclrtStream stream);

/**
@@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
void *binData, int binSize, aclopEngineType enginetype,
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
@@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
@@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
const void *args, uint32_t argSize);
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);

/**
* @ingroup AscendCL
@@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
const aclTensorDesc *const inputDesc[], int numOutputs,
const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);

/**
* @ingroup AscendCL
@@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
int numInputs,
aclTensorDesc *inputDesc[],
aclDataBuffer *inputs[],
int numOutputs,
aclTensorDesc *outputDesc[],
aclopAttr *attr);


#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 28
- 19
inc/external/acl/acl_op_compiler.h View File

@@ -24,18 +24,21 @@
extern "C" {
#endif

typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
typedef enum aclCompileType {
ACL_COMPILE_SYS,
ACL_COMPILE_UNREGISTERED
} aclopCompileType;

typedef enum {
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
ACL_PRECISION_MODE,
ACL_AICORE_NUM,
ACL_AUTO_TUNE_MODE,
ACL_OP_SELECT_IMPL_MODE,
ACL_OPTYPELIST_FOR_IMPLMODE,
ACL_OP_DEBUG_LEVEL,
ACL_DEBUG_DIR,
ACL_OP_COMPILER_CACHE_MODE,
ACL_OP_COMPILER_CACHE_DIR
} aclCompileOpt;

/**
@@ -56,10 +59,15 @@ typedef enum {
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
int numOutputs, const aclTensorDesc *const outputDesc[],
const aclopAttr *attr, aclopEngineType engineType,
aclopCompileType compileFlag, const char *opPath);
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr,
aclopEngineType engineType,
aclopCompileType compileFlag,
const char *opPath);

/**
* @ingroup AscendCL
@@ -82,10 +90,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
const char *opPath, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -103,4 +112,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_

+ 21
- 21
inc/external/acl/acl_prof.h View File

@@ -23,21 +23,21 @@
extern "C" {
#endif

#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008
#define ACL_PROF_ACL_API 0x0001
#define ACL_PROF_TASK_TIME 0x0002
#define ACL_PROF_AICORE_METRICS 0x0004
#define ACL_PROF_AICPU 0x0008

#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65
#define ACL_PROF_MAX_OP_NAME_LEN 257
#define ACL_PROF_MAX_OP_TYPE_LEN 65

typedef enum {
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
ACL_AICORE_PIPE_UTILIZATION = 1,
ACL_AICORE_MEMORY_BANDWIDTH = 2,
ACL_AICORE_L0B_AND_WIDTH = 3,
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

typedef struct aclprofConfig aclprofConfig;
@@ -98,8 +98,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
* @see aclprofDestroyConfig
*/
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
aclprofAicoreMetrics aicoreMetrics,
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);

/**
* @ingroup AscendCL
@@ -139,7 +138,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
*
* @see aclprofModelUnSubscribe
*/
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
const aclprofSubscribeConfig *profSubscribeConfig);

/**
* @ingroup AscendCL
@@ -167,7 +167,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
* @see aclprofDestroySubscribeConfig
*/
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
aclprofAicoreMetrics aicoreMetrics, void *fd);
aclprofAicoreMetrics aicoreMetrics, void *fd);

/**
* @ingroup AscendCL
@@ -219,8 +219,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
size_t opTypeLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opType, size_t opTypeLen);

/**
* @ingroup AscendCL
@@ -235,8 +235,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
size_t opNameLen);
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
char *opName, size_t opNameLen);

/**
* @ingroup AscendCL
@@ -293,4 +293,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe
}
#endif

#endif // INC_EXTERNAL_ACL_PROF_H_
#endif // INC_EXTERNAL_ACL_PROF_H_

+ 79
- 61
inc/external/acl/acl_rt.h View File

@@ -26,62 +26,62 @@ extern "C" {
#endif

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
ACL_MEM_MALLOC_HUGE_FIRST_P2P,
ACL_MEM_MALLOC_HUGE_ONLY_P2P,
ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
} aclrtMemMallocPolicy;

typedef enum aclrtMemAttr {
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
ACL_DDR_MEM,
ACL_HBM_MEM,
ACL_DDR_MEM_HUGE,
ACL_DDR_MEM_NORMAL,
ACL_HBM_MEM_HUGE,
ACL_HBM_MEM_NORMAL,
ACL_DDR_MEM_P2P_HUGE,
ACL_DDR_MEM_P2P_NORMAL,
ACL_HBM_MEM_P2P_HUGE,
ACL_HBM_MEM_P2P_NORMAL,
} aclrtMemAttr;

typedef enum aclrtGroupAttr {
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
ACL_GROUP_AICORE_INT,
ACL_GROUP_AIV_INT,
ACL_GROUP_AIC_INT,
ACL_GROUP_SDMANUM_INT,
ACL_GROUP_ASQNUM_INT
} aclrtGroupAttr;

typedef struct tagRtGroupInfo aclrtGroupInfo;
@@ -472,7 +472,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
/**
* @ingroup AscendCL
* @brief Queries an event's status
*
@@ -534,7 +534,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start,
*
* @see aclrtFree | acldvppMalloc | aclrtMallocCached
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -557,7 +559,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal
*
* @see aclrtFree | aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
@@ -648,7 +652,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind);

/**
@@ -695,31 +702,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
aclrtMemcpyKind kind, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
*
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
aclrtStream stream);

/**
@@ -865,8 +879,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
*
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo
*/
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId,
aclrtGroupAttr attr, void *attrValue, size_t valueLen,
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
int32_t groupId,
aclrtGroupAttr attr,
void *attrValue,
size_t valueLen,
size_t *paramRetSize);

/**
@@ -929,4 +946,5 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_
#endif // INC_EXTERNAL_ACL_ACL_RT_H_


+ 19
- 12
inc/external/acl/acl_tdt.h View File

@@ -24,10 +24,10 @@ extern "C" {
#endif

enum acltdtTensorType {
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
ACL_TENSOR_DATA_UNDEFINED = -1,
ACL_TENSOR_DATA_TENSOR,
ACL_TENSOR_DATA_END_OF_SEQUENCE,
ACL_TENSOR_DATA_ABNORMAL
};

typedef struct acltdtDataItem acltdtDataItem;
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *
*
* @retval null for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);

/**
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);

/**
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI
*
* @retval 0 for failed
* @retval OtherValues success
*/
*/
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);

/**
@@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte
*
* @see acltdtDestroyDataItem
*/
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
aclDataType dataType, void *data, size_t size);
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
const int64_t *dims,
size_t dimNum,
aclDataType dataType,
void *data,
size_t size);

/**
* @ingroup AscendCL
@@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
*
* @see acltdtReceiveTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
const acltdtDataset *dataset,
int32_t timeout);

/**
@@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
*
* @see acltdtSendTensor
*/
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
acltdtDataset *dataset,
int32_t timeout);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_TDT_H_
#endif //INC_EXTERNAL_ACL_ACL_TDT_H_


+ 14
- 0
inc/external/acl/error_codes/ge_error_codes.h View File

@@ -17,6 +17,20 @@
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

#if defined(_MSC_VER)
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY _declspec(dllexport)
#else
#define GE_FUNC_VISIBILITY
#endif
#else
#ifdef FUNC_VISIBILITY
#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_VISIBILITY
#endif
#endif

#include <stddef.h>

#ifdef __cplusplus


+ 68
- 67
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -23,79 +23,80 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 138
- 41
inc/external/acl/ops/acl_cblas.h View File

@@ -23,9 +23,17 @@
extern "C" {
#endif

typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;

typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;

/**
* @ingroup AscendCL
@@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
@@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
aclDataType dataTypeX, aclDataType dataTypeY,
aclComputeType type, aclopHandle **handle);
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
@@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
@@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
int incy, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
@@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const void *alpha, const void *matrixA, int lda,
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclDataType dataTypeA,
aclDataType dataTypeB, aclDataType dataTypeC,
aclComputeType type, aclopHandle **handle);
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);


/**
* @ingroup AscendCL
@@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

/**
@@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
int k, const int32_t *alpha, const int8_t *matrixA, int lda,
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
int ldc, aclComputeType type, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
@@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
int m, int n, int k, aclComputeType type,
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 222
- 147
inc/external/acl/ops/acl_dvpp.h View File

@@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output

// Supported Pixel Format
enum acldvppPixelFormat {
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
PIXEL_FORMAT_YUV_400 = 0, // 0
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11
PIXEL_FORMAT_RGB_888 = 12, // 12
PIXEL_FORMAT_BGR_888 = 13, // 13
PIXEL_FORMAT_ARGB_8888 = 14, // 14
PIXEL_FORMAT_ABGR_8888 = 15, // 15
PIXEL_FORMAT_RGBA_8888 = 16, // 16
PIXEL_FORMAT_BGRA_8888 = 17, // 17
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20
PIXEL_FORMAT_YVU_PLANAR_422,
PIXEL_FORMAT_YVU_PLANAR_444,
PIXEL_FORMAT_RGB_444 = 23,
PIXEL_FORMAT_BGR_444,
PIXEL_FORMAT_ARGB_4444,
PIXEL_FORMAT_ABGR_4444,
PIXEL_FORMAT_RGBA_4444,
PIXEL_FORMAT_BGRA_4444,
PIXEL_FORMAT_RGB_555,
PIXEL_FORMAT_BGR_555,
PIXEL_FORMAT_RGB_565,
PIXEL_FORMAT_BGR_565,
PIXEL_FORMAT_ARGB_1555,
PIXEL_FORMAT_ABGR_1555,
PIXEL_FORMAT_RGBA_1555,
PIXEL_FORMAT_BGRA_1555,
PIXEL_FORMAT_ARGB_8565,
PIXEL_FORMAT_ABGR_8565,
PIXEL_FORMAT_RGBA_8565,
PIXEL_FORMAT_BGRA_8565,
PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
PIXEL_FORMAT_RGB_BAYER_10BPP,
PIXEL_FORMAT_RGB_BAYER_12BPP,
PIXEL_FORMAT_RGB_BAYER_14BPP,
PIXEL_FORMAT_RGB_BAYER_16BPP,
PIXEL_FORMAT_BGR_888_PLANAR = 70,
PIXEL_FORMAT_HSV_888_PACKAGE,
PIXEL_FORMAT_HSV_888_PLANAR,
PIXEL_FORMAT_LAB_888_PACKAGE,
PIXEL_FORMAT_LAB_888_PLANAR,
PIXEL_FORMAT_S8C1,
PIXEL_FORMAT_S8C2_PACKAGE,
PIXEL_FORMAT_S8C2_PLANAR,
PIXEL_FORMAT_S16C1,
PIXEL_FORMAT_U8C1,
PIXEL_FORMAT_U16C1,
PIXEL_FORMAT_S32C1,
PIXEL_FORMAT_U32C1,
PIXEL_FORMAT_U64C1,
PIXEL_FORMAT_S64C1,
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
PIXEL_FORMAT_YVU_SEMIPLANAR_440,
PIXEL_FORMAT_FLOAT32,
PIXEL_FORMAT_BUTT,
PIXEL_FORMAT_UNKNOWN = 10000
};

// Stream Format
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
enum acldvppStreamFormat {
H265_MAIN_LEVEL = 0,
H264_BASELINE_LEVEL,
H264_MAIN_LEVEL,
H264_HIGH_LEVEL
};

// Supported Channel Mode
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
enum acldvppChannelMode {
DVPP_CHNMODE_VPC = 1,
DVPP_CHNMODE_JPEGD = 2,
DVPP_CHNMODE_JPEGE = 4
};

// Supported Border Type
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
enum acldvppBorderType {
BORDER_CONSTANT = 0,
BORDER_REPLICATE,
BORDER_REFLECT,
BORDER_REFLECT_101
};

// Venc parameter type
enum aclvencChannelDescParamType {
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
ACL_VENC_THREAD_ID_UINT64 = 0,
ACL_VENC_CALLBACK_PTR,
ACL_VENC_PIXEL_FORMAT_UINT32,
ACL_VENC_ENCODE_TYPE_UINT32,
ACL_VENC_PIC_WIDTH_UINT32,
ACL_VENC_PIC_HEIGHT_UINT32,
ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
ACL_VENC_BUF_ADDR_PTR,
ACL_VENC_BUF_SIZE_UINT32,
ACL_VENC_RC_MODE_UINT32,
ACL_VENC_SRC_RATE_UINT32,
ACL_VENC_MAX_BITRATE_UINT32,
ACL_VENC_MAX_IP_PROP_UINT32
};

// Jpeg picture format
enum acldvppJpegFormat {
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
ACL_JPEG_CSS_444 = 0,
ACL_JPEG_CSS_422,
ACL_JPEG_CSS_420,
ACL_JPEG_CSS_GRAY,
ACL_JPEG_CSS_440,
ACL_JPEG_CSS_411,
ACL_JPEG_CSS_UNKNOWN = 1000
};

/**
@@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD
* @retval null for failed.
* @retval other success
*/
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config,
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config,
uint32_t left,
uint32_t right,
uint32_t top,
uint32_t bottom);

/**
@@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
const void *param);
aclvencChannelDescParamType paramType, size_t length, const void *param);

/**
* @ingroup AscendCL
@@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
aclvencChannelDescParamType paramType, size_t length,
size_t *paramRetSize, void *param);
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
@@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
@@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
uint32_t *height, int32_t *components,
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data,
uint32_t size,
uint32_t *width,
uint32_t *height,
int32_t *components,
acldvppJpegFormat *format);

/**
@@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
const acldvppJpegeConfig *config, uint32_t *size);
const acldvppJpegeConfig *config,
uint32_t *size);

/**
* @ingroup AscendCL
@@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
uint32_t *height, int32_t *components);
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data,
uint32_t dataSize,
uint32_t *width,
uint32_t *height,
int32_t *components);

/**
* @ingroup AscendCL
@@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data,
uint32_t dataSize,
acldvppPixelFormat outputPixelFormat,
uint32_t *decSize);

/**
* @ingroup AscendCL
@@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe
* @see acldvppCreateChannel | acldvppCreatePicDesc
* | acldvppCreateResizeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppResizeConfig *resizeConfig,
aclrtStream stream);

/**
@@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
aclrtStream stream);

/**
@@ -1734,9 +1769,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[], aclrtStream stream);
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1759,9 +1797,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
acldvppRoiConfig *cropArea,
acldvppRoiConfig *pasteArea,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1786,11 +1827,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[], aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
acldvppBatchPicDesc *srcBatchPicDescs,
uint32_t *roiNums,
uint32_t size,
acldvppBatchPicDesc *dstBatchPicDescs,
acldvppRoiConfig *cropAreas[],
acldvppRoiConfig *pasteAreas[],
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1818,8 +1862,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1837,8 +1884,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreateJpegeConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
const void *data, uint32_t *size, acldvppJpegeConfig *config,
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
const void *data,
uint32_t *size,
acldvppJpegeConfig *config,
aclrtStream stream);

/**
@@ -1856,8 +1906,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc,
const void *data,
uint32_t size,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1912,8 +1965,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
acldvppPicDesc *output,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -1932,8 +1988,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a
*
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
*/
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
aclvdecFrameConfig *config, void *userData);
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc,
acldvppStreamDesc *input,
aclvdecFrameConfig *config,
void *userData);

/**
* @ingroup AscendCL
@@ -1954,8 +2012,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1977,8 +2037,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha
*
* @see acldvppCreateChannel | acldvppCreatePicDesc
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -1990,7 +2053,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc,
uint32_t mode);

/**
* @ingroup AscendCL
@@ -2025,7 +2089,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc,
uint32_t outMode);

/**
* @ingroup AscendCL
@@ -2122,7 +2187,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap,
uint32_t dim,
uint8_t **data,
uint32_t *len);
/**
* @ingroup AscendCL
@@ -2140,8 +2207,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppLutMap *lutMap,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2162,7 +2231,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
*
* @retval ACL_SUCCESS for success, other for failure
*/
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig,
uint32_t index,
double value);

/**
@@ -2307,8 +2377,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig, aclrtStream stream);
const acldvppPicDesc *inputDesc,
acldvppPicDesc *outputDesc,
const acldvppBorderConfig *borderConfig,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2325,8 +2397,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc
*
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
*/
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
acldvppHist *hist, void *reserve, aclrtStream stream);
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc,
acldvppPicDesc *srcPicDesc,
acldvppHist *hist,
void *reserve,
aclrtStream stream);

/**
* @ingroup AscendCL
@@ -2335,7 +2410,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist();

/**
* @ingroup AscendCL
@@ -2392,7 +2467,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim,
*
* @see acldvppCreateHist | acldvppVpcCalcHistAsync
*/
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist);

/**
* @ingroup AscendCL
@@ -2415,4 +2490,4 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_

+ 6
- 8
inc/external/acl/ops/acl_fv.h View File

@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/**
@@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount,
uint8_t *featureData, uint32_t featureDataLen);
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);

/**
* @ingroup AscendCL
@@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/**
* @ingroup AscendCL
@@ -350,4 +348,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 16
- 17
inc/external/hccl/hccl.h View File

@@ -27,7 +27,7 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief Initialize HCCL.
@@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root
* @param sendBuf A pointer identifying the input data address of the operator.
* @param recvBuf A pointer identifying the output data address of the operator.
* @param count An integer(u64) identifying the number of the output data.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
* float32.
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
HcclComm comm, aclrtStream stream);
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief Broadcast operator.
@@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc
* @param root An integer(u32) identifying the the root rank in the operator.
* @param comm A pointer identifying the communication resource based on
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
aclrtStream stream);

/**
* @brief ReduceScatter operator.
@@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
HcclReduceOp op, HcclComm comm, aclrtStream stream);

/**
* @brief AllGather operator.
@@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
* @param comm A pointer identifying the communication resource based on.
* @param stream A pointer identifying the stream information.
* @return HcclResult
* @return HcclResult
*/
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
aclrtStream stream);
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType,
HcclComm comm, aclrtStream stream);

/**
* @brief Destroy HCCL comm
@@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm);

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_H_
#endif // __cplusplus
#endif // HCCL_H_

+ 42
- 42
inc/external/hccl/hccl_types.h View File

@@ -16,10 +16,10 @@

/**
* @file hccl_types.h
* @brief HCCL data type definition
*
* @brief HCCL data type definition
*
*/
#ifndef HCCL_TYPES_H_
#define HCCL_TYPES_H_

@@ -27,33 +27,33 @@

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#endif // __cplusplus

/**
* @brief HCCL functions return value definition
*/
typedef enum {
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
HCCL_SUCCESS = 0, /**< success */
HCCL_E_PARA = 1, /**< parameter error */
HCCL_E_PTR = 2, /**< empty pointer */
HCCL_E_MEMORY = 3, /**< memory error */
HCCL_E_INTERNAL = 4, /**< internal error */
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
HCCL_E_SYSCALL = 8, /**< call system interface error */
HCCL_E_TIMEOUT = 9, /**< timeout */
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
HCCL_E_DRV = 16, /**< call driver api fail */
HCCL_E_PROFILING = 17, /**< call profiling api fail */
HCCL_E_CCE = 18, /**< call cce api fail */
HCCL_E_NETWORK = 19, /**< call network api fail */
HCCL_E_RESERVED /**< reserved */
} HcclResult;

/**
@@ -65,37 +65,37 @@ typedef void *HcclComm;
* @brief HCCL Reduction opperation
*/
typedef enum {
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
HCCL_REDUCE_SUM = 0, /**< sum */
HCCL_REDUCE_PROD = 1, /**< prod */
HCCL_REDUCE_MAX = 2, /**< max */
HCCL_REDUCE_MIN = 3, /**< min */
HCCL_REDUCE_RESERVED /**< reserved */
} HcclReduceOp;

/**
* @brief HCCL data type
*/
typedef enum {
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
HCCL_DATA_TYPE_RESERVED /**< reserved */
} HcclDataType;

const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length

/**
* @brief HCCL root info
*/
typedef struct HcclRootInfoDef {
char internal[HCCL_ROOT_INFO_BYTES];
char internal[HCCL_ROOT_INFO_BYTES];
} HcclRootInfo;

#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HCCL_TYPES_H_
#endif // __cplusplus
#endif // HCCL_TYPES_H_

+ 68
- 67
inc/external/runtime/rt_error_codes.h View File

@@ -23,79 +23,80 @@
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 10
- 5
third_party/fwkacllib/inc/ops/batch_ops.h View File

@@ -107,11 +107,13 @@ across multiple sessions . \n

REG_OP(Unbatch)
.INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(index, TensorType({DT_INT64}))
.INPUT(id, TensorType({DT_INT64}))
.OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.REQUIRED_ATTR(timeout_micros, Int)
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions . \n

REG_OP(UnbatchGrad)
.INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(index, TensorType({DT_INT64}))
.INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(id, TensorType({DT_INT64}))
.OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(container, String, "")
.ATTR(shared_name, String, "")
.OP_END_FACTORY_REG(UnbatchGrad)


+ 44
- 3
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear)
.ATTR(shared_name, String, "")
.OP_END_FACTORY_REG(OrderedMapClear)

/**
*@brief FakeQueue, support tf api FixedLengthRecordReader. \n

*@par Inputs:
*Including:
* @li resource: A Tensor of type DT_RESOURCE.

*@par Outputs:
*handle: A Tensor of type DT_STRING ref. \n

*@par Third-party framework compatibility
*Compatible with the TensorFlow operator FakeQueue.
*/
REG_OP(FakeQueue)
.INPUT(resource, TensorType({DT_RESOURCE}))
.OUTPUT(handle, TensorType({DT_STRING}))
.OP_END_FACTORY_REG(FakeQueue)

/**
*@brief Returns the number of incomplete elements in the underlying container. \n

@@ -2258,6 +2276,7 @@ REG_OP(LruCache)
.ATTR(shared_name, String, "LruCache")
.ATTR(cache_size, Int, 100000)
.ATTR(load_factor, Float, 1)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(LruCache)

/**
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd)
.INPUT(cache, TensorType({DT_RESOURCE}))
.INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OUTPUT(swap_in_idx, TensorType({DT_INT64}))
.OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OUTPUT(swap_out_idx, TensorType({DT_INT64}))
.OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OP_END_FACTORY_REG(CacheAdd)

/**
@@ -2295,9 +2314,31 @@ REG_OP(CacheAdd)
REG_OP(CacheRemoteIndexToLocal)
.INPUT(cache, TensorType({DT_RESOURCE}))
.INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OUTPUT(local_idx, TensorType({DT_INT64}))
.OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.OP_END_FACTORY_REG(CacheRemoteIndexToLocal)

/**
*@brief CacheAllToLocalIndex, get id in cache
*@par Inputs:
*cache: resource data
*local_idx: id in cache.
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(CacheAllIndexToLocal)
.INPUT(cache, TensorType({DT_RESOURCE}))
.OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(CacheAllIndexToLocal)

REG_OP(DynamicGetNext)
.INPUT(x, TensorType::ALL())
.DYNAMIC_OUTPUT(y, TensorType::ALL())
.ATTR(output_types, ListType, {})
.ATTR(output_shapes, ListListInt, {{}, {}})
.ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
.ATTR(_getnext_inputs_shape_range, String, "")
.OP_END_FACTORY_REG(DynamicGetNext)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_

+ 80
- 0
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3627,6 +3627,35 @@ REG_OP(Lerp)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(Lerp)

/**
*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n

*
*@par Inputs:
*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
*@li x2: A tensor of the same type as "x1".
*
*@par Attributes:
* atol: Defaults to "1e-05".
* rtol: Defaults to "1e-03".
*
*@par Outputs:
* num: A tensor of type int32.
* diff: A tensor of type float16.
*
*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*
*/
REG_OP(DataCompare)
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
.OUTPUT(num, TensorType({DT_FLOAT}))
.OUTPUT(diff, TensorType({DT_FLOAT16}))
.ATTR(atol, Float, 1e-5)
.ATTR(rtol, Float, 1e-3)
.OP_END_FACTORY_REG(DataCompare)

/**
*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
@@ -3650,6 +3679,57 @@ REG_OP(HardMax)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(axis, Int, -1)
.OP_END_FACTORY_REG(HardMax)

/**
* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.

* @par Inputs:
* Two inputs, including:
* @li input_x: A Tensor. the first tensor must be 1d. \n
* @li input_y: A Tensor. the second tensor must be 1d. \n

* @par Outputs:
* @li output: A Tensor. Result of the two inputs, must be 1d. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch dot operator. \n
*/
REG_OP(Dot)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
.OP_END_FACTORY_REG(Dot)
/**
*@brief Returns a new tensor with boolean elements representing \n
*if each element of input is “close” to the corresponding element of other \n

*@par Inputs:
*Two inputs, including:
* @li x1: A tensor. Must be one of the following types:
* float16, float32, int32. \n
* @li x2: A tensor with the same type and shape of x1's. \n

*@par Attributes:
*@li rtol: An optional float.Defaults to 1e-05. \n
*@li atol: An optional float.Defaults to 1e-08. \n
*@li equal_nan: An optional bool.Defaults to false. \n

*@par Outputs:
*y: A Tensor bool with the same shape of x1's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator isclose. \n
*/
REG_OP(IsClose)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_BOOL}))
.ATTR(rtol, Float, 1e-05)
.ATTR(atol, Float, 1e-08)
.ATTR(equal_nan, Bool, false)
.OP_END_FACTORY_REG(IsClose)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_

+ 16
- 0
third_party/fwkacllib/inc/ops/hcom_ops.h View File

@@ -238,6 +238,15 @@ REG_OP(HcomRemoteRead)
.REQUIRED_ATTR(dtype, Type)
.OP_END_FACTORY_REG(HcomRemoteRead)

/**
* @brief Performs Remote Ref Read of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
* cache_var: The local base address
* local_offset: Skip step length
* @par Outputs:
* cache_var: The local base address
*/
REG_OP(HcomRemoteRefRead)
.INPUT(remote, TensorType({DT_UINT64}))
.INPUT(cache_var, TensorType({DT_UINT64}))
@@ -258,6 +267,13 @@ REG_OP(HcomRemoteWrite)
.INPUT(local, TensorType::ALL())
.OP_END_FACTORY_REG(HcomRemoteWrite)

/**
* @brief Performs Remote Write of input tensors
* @par Inputs:
* remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
* @par Inputs:
* local: A Tensor. whose value is length / size_of(Type)
*/
REG_OP(HcomRemoteScatterWrite)
.INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
.INPUT(local, TensorType::ALL())


+ 58
- 2
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -652,6 +652,62 @@ REG_OP(RGBToHSV)
/**
*@brief Generate a single randomly distorted bounding box for an image . \n

*@par Inputs:
*Input images must be a 4-D tensor. Inputs include:
*@li image_size: 1-D, containing [height, width, channels].
*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
boxes associated with the image. \n

*@par Attributes:
*@li seed: If either seed or seed2 are set to non-zero, the random number
generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
*@li seed2: A second seed to avoid seed collision.
*@li min_object_covered: The cropped area of the image must contain at least
this fraction of any bounding box supplied. The value of this parameter should
be non-negative. In the case of 0, the cropped area does not need to overlap
any of the bounding boxes supplied .
*@li aspect_ratio_range: The cropped area of the image must have an aspect
ratio = width / height within this range.
*@li max_attempts: Number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return the
entire image.
*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
supplied. If true, assume an implicit bounding box covering the whole input.
If false, raise an error . \n

*@par Outputs:
*@li begin: 1-D, containing [offset_height, offset_width, 0].
*@li size: 1-D, containing [target_height, target_width, -1].
*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n

*@attention Constraints:
*Input images can be of different types but output images are always float . \n

*@par Third-party framework compatibility
*Compatible with tensorflow SampleDistortedBoundingBox operator.
*/

REG_OP(SampleDistortedBoundingBox)
.INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
.OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
DT_INT32, DT_INT64 }))
.OUTPUT(bboxes, TensorType({ DT_FLOAT }))
.ATTR(seed, Int, 0)
.ATTR(seed2, Int, 0)
.ATTR(min_object_covered, Float, 0.1f)
.ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
.ATTR(area_range, ListFloat, { 0.05f, 1.0f })
.ATTR(max_attempts, Int, 100)
.ATTR(use_image_if_no_bounding_boxes, Bool, false)
.OP_END_FACTORY_REG(SampleDistortedBoundingBox)

/**
*@brief Generate a single randomly distorted bounding box for an image . \n

*@par Inputs:
*Input images must be a 4-D tensor. Inputs include:
*@li image_size: 1-D, containing [height, width, channels].
@@ -1424,11 +1480,11 @@ REG_OP(Resize)

*@par Attributes:
*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n

*@par Outputs:
*image: A Tensor dtype of uint8.


+ 46
- 38
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)

*@par Inputs:
*The input x has to be symmetric and positive definite.Inputs include:
*x:A Tensor. Must be one of the following types: double, float32. Shape
is [..., M, M] . \n
*x:A Tensor. Must be one of the following types: double, float32, float16,
complex64, complex128. Shape is [..., M, M] . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n
@@ -76,8 +76,10 @@ form square matrices.
*/

REG_OP(Cholesky)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(Cholesky)

/**
@@ -87,8 +89,8 @@ of one or more square matrices . \n
*@par Inputs:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float32. Shape is
[..., M, M] . \n
*x:A Tensor. Must be one of the following types: double, float32,
complex64, complex128. Shape is [..., M, M] . \n

*@par Outputs:
*@li y:A Tensor. Has the same type as x.
@@ -103,9 +105,9 @@ form square matrices. \n
*/

REG_OP(LogMatrixDeterminant)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(LogMatrixDeterminant)

/**
@@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant)
*@par Inputs:
*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float32. Shape is
[..., M, M] . \n
*x:A Tensor. Must be one of the following types: double, float32, complex64,
complex128. Shape is [..., M, M] . \n

*@par Outputs:
*y:A Tensor. Has the same type as x . \n
@@ -129,8 +131,8 @@ form square matrices.
*/

REG_OP(MatrixDeterminant)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(MatrixDeterminant)

/**
@@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n
*@par Inputs:
*The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
form square matrices. Inputs include:
*x:A Tensor. Must be one of the following types: double, float. Shape is
[..., M, M] . \n
*x:A Tensor of input. Shape is [..., M, M] . \n

*@par Attributes:
*adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +160,10 @@ form square matrices. \n
*/

REG_OP(MatrixInverse)
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixInverse)

@@ -169,8 +172,7 @@ REG_OP(MatrixInverse)

*@par Inputs:
*The input rhs must have the same type as matrix. Inputs include:
*@li matrix:A Tensor. Must be one of the following types: double, float.
Shape is [..., M, M].
*@li matrix:A Tensor of input. Shape is [..., M, M].
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

*@par Attributes:
@@ -189,9 +191,9 @@ dimensions form square matrices. \n
*/

REG_OP(MatrixSolve)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixSolve)

@@ -221,8 +223,10 @@ dimensions form square matrices. \n
*/

REG_OP(MatrixSolveLs)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(l2, TensorType({DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(fast, Bool, true)
@@ -234,8 +238,7 @@ matrices by backsubstitution . \n

*@par Inputs:
*The input rhs must have the same type as matrix. Inputs include:
*@li matrix: A Tensor. Must be one of the following types: double, float.
Shape is [..., M, M].
*@li matrix: A Tensor. Shape is [..., M, M].
*@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

*@par Attributes:
@@ -256,9 +259,12 @@ dimensions form square matrices. \n
*/

REG_OP(MatrixTriangularSolve)
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(lower, Bool, true)
.ATTR(adjoint, Bool, false)
.OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +274,7 @@ REG_OP(MatrixTriangularSolve)

*@par Inputs:
*The input shape of x must be [..., M, N]. Inputs include:
*x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
double, float . \n
*x:A Tensor whose shape is [..., M, N]. \n

*@par Attributes:
*full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +294,12 @@ dimensions form matrices of size [M, N]. \n
*/

REG_OP(Qr)
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
.INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
DT_COMPLEX64, DT_COMPLEX128 }))
.ATTR(full_matrices, Bool, false)
.OP_END_FACTORY_REG(Qr)

@@ -384,8 +392,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
*/

REG_OP(Lu)
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(output_idx_type, Type)
.OP_END_FACTORY_REG(Lu)
@@ -404,8 +412,8 @@ y: Shape is `[..., M, M]` . \n
*/

REG_OP(MatrixSquareRoot)
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(MatrixSquareRoot)

/**


+ 1
- 1
third_party/fwkacllib/inc/ops/list_ops.h View File

@@ -192,7 +192,7 @@ REG_OP(TensorListGetItem)
.INPUT(element_shape, TensorType({DT_INT32}))
.OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
.ATTR(element_dtype, Type, DT_INT32)
.OP_END_FACTORY_REG(TensorListGetItem)


+ 68
- 0
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -222,6 +222,24 @@ REG_OP(Bucketize)
.REQUIRED_ATTR(boundaries, ListFloat)
.OP_END_FACTORY_REG(Bucketize)

/**
*@brief Returns a new tensor with the truncated integer values of the elements of input. \n

*@par Inputs:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n

*@par Outputs:
*y: A tensor with the same type and shape of input_x \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator Trunc. \n
*/
REG_OP(Trunc)
.INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
.OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
.OP_END_FACTORY_REG(Trunc)
/**
*@brief Computes the sum along sparse segments of a tensor . \n

@@ -645,6 +663,7 @@ REG_OP(NLLLoss)
.OUTPUT(y, TensorType({DT_FLOAT}))
.OUTPUT(total_weight, TensorType({DT_FLOAT}))
.ATTR(reduction, String, "mean")
.ATTR(ignore_index, Int, -100)
.OP_END_FACTORY_REG(NLLLoss)

/**
@@ -674,6 +693,7 @@ REG_OP(NLLLossGrad)
.INPUT(total_weight, TensorType({DT_FLOAT}))
.OUTPUT(x_grad, TensorType({DT_FLOAT}))
.ATTR(reduction, String, "mean")
.ATTR(ignore_index, Int, -100)
.OP_END_FACTORY_REG(NLLLossGrad)

/**
@@ -884,6 +904,54 @@ REG_OP(LpNorm)
.ATTR(keepdim, Bool, false)
.ATTR(epsilon, Float, 1e-12)
.OP_END_FACTORY_REG(LpNorm)

/**
* @brief get complex.

* @par Inputs:
* @li real: An ND tensor of type float32. double
* @li imag: An ND tensor of type float32. double \n
*
* @par Outputs:
* @li out: An ND tensor of type complex64, complex128 \n
*/
REG_OP(Complex)
.INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
.OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.ATTR(Tout, Type, DT_COMPLEX64)
.OP_END_FACTORY_REG(Complex)

/**
* @brief deal complex.

* @par Inputs:
* @li input: An ND tensor of type complex64, complex128 \n
*
* @par Outputs:
* @li output: An ND tensor of type float32. double \n
*/
REG_OP(Imag)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(Imag)

/**
* @brief deal complex.

* @par Inputs:
* @li input: An ND tensor of type complex64, complex128 \n
*
* @par Outputs:
* @li output: An ND tensor of type float32. double \n
*/
REG_OP(Angle)
.INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
.ATTR(Tout, Type, DT_FLOAT)
.OP_END_FACTORY_REG(Angle)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

+ 21
- 0
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -1022,6 +1022,27 @@ REG_OP(IndexAdd)
.ATTR(axis, Int, 0)
.OP_END_FACTORY_REG(IndexAdd)

/**
*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n

*@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float16, float32, double, int32, uint8, int16, int8, complex64, int64,
* qint8, quint8, qint32, uint16, complex128, uint32, uint64.
*@li diagonal:(int, optional) – the diagonal to consider。\n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

*@par Third-party framework compatibility
* Compatible with the Pytorch operator Triu.
*/
REG_OP(Triu)
.INPUT(x, TensorType::BasicType())
.ATTR(diagonal, Int, 0)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Triu)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

+ 104
- 0
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -144,6 +144,64 @@ REG_OP(BatchNorm)
/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NDC1HWC0. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NC1HWC0. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
operation is used for training.
*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
if the operation is used for training . \n

*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

*@par Outputs:
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
if input "x" is with format NDC1HWC0. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n

*@par Third-party framework compatibility
*@li Compatible with the TensorFlow operator fused_batch_norm.
*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
*/
REG_OP(BatchNorm3D)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NCDHW")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm3D)
/**
*@brief Performs batch normalization . \n

*@par Inputs:
* Five inputs, including: (NHWC or NCHW supported)
*@li x: A 4D Tensor of type float16 or float32.
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad)
/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Five inputs, including:
*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n

*@par Attributes:
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
*@li data_format: An optional string. Defaults to "NCDHW".
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

*@par Outputs:
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n

*@attention Constraints:
* The preceding layer of this operator must be operator BatchNorm . \n

*@see BatchNorm
*@par Third-party framework compatibility
* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
*/
REG_OP(BatchNorm3DGrad)
.INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(reserve_space_1, TensorType({DT_FLOAT}))
.INPUT(reserve_space_2, TensorType({DT_FLOAT}))
.OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
.OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NCDHW")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(BatchNorm3DGrad)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Five inputs, including:
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.


+ 8
- 8
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -1059,7 +1059,7 @@ REG_OP(DeformableConv2D)

*@par Attributes:
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A list of 5 integers. Specifies the dilation factor for each
@@ -1119,7 +1119,7 @@ REG_OP(Conv3D)
*@par Attributes:
* Three attributes:
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1167,7 +1167,7 @@ REG_OP(Conv3DBackpropInput)
*@par Attributes:
* Three attributes:
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1267,7 +1267,7 @@ REG_OP(LSTM)
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.

@@ -1319,7 +1319,7 @@ REG_OP(Conv3DBackpropFilter)
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.

@@ -1369,7 +1369,7 @@ REG_OP(Conv3DBackpropFilterD)
*@par Attributes:
* Five attributes:
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li dilations: A tuple/list of 5 integers,
* The dilation factor for each dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
@@ -1422,7 +1422,7 @@ REG_OP(Conv3DTranspose)
* dimension of input.
* The N, C and D dimensions must be 1. Has the same format as "x".
* @li groups: Number of blocked connections from input channels to output
* channels. Reserved.
* channels.
* @li data_format: An optional string from: "NDHWC", "NCDHW".
* Defaults to "NDHWC". Specify the data format of the input and output data.
* @li output_padding: The size will be added in the output shape.
@@ -1624,7 +1624,7 @@ REG_OP(Conv2DTransposeD)
* of the input.
* @li ksize: A tuple/list of 2 integers.kernel size.
*@par Attributes:
* Three attributes:
* Four attributes:
* @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
* of input. Defaults to [1, 1, 1, 1]
* @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.


+ 46
- 1
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -968,8 +968,9 @@ REG_OP(SPP)
* Three inputs, including:
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
* map.
*@li rois: A tensor of type float16 or float32, with shape
*@li rois: A tensor of type float16 or float32, with 3D shape
* [batch, 5, roi_max_num], describing the RIOs.
* roi_max_num must be less than or equal to 6000 and must be divided by 16.
*@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
* the number of ROIs per batch . \n

@@ -1604,6 +1605,50 @@ REG_OP(NonMaxSuppressionV7)
.ATTR(max_boxes_size, Int, 0)
.OP_END_FACTORY_REG(NonMaxSuppressionV7)

/**
*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n

*@par Inputs:
* Three inputs, including:
*@li features: A 5HD Tensor list of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".

*@par Attributes:
*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
* to the original image.
*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
* which is a floating point number. Defaults to "0".
*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n

*@par Outputs:
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
* "pooled_width", and "features", respectively.

*@par Third-party framework compatibility
*Compatible with mmdetection SingleRoIExtractor operator.
*/
REG_OP(RoiExtractor)
.DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(finest_scale, Int, 56)
.ATTR(roi_scale_factor, Float, 0)
.ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
.ATTR(pooled_height, Int, 7)
.ATTR(pooled_width, Int, 7)
.ATTR(sample_num, Int, 0)
.ATTR(pool_mode, String, "avg")
.ATTR(aligned, Bool, true)
.OP_END_FACTORY_REG(RoiExtractor)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_

+ 41
- 0
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -1233,6 +1233,47 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
.OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
/**
* @brief Calculate the PoissonNllLoss function.
* target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n

* @par Inputs:
* Two inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n
*
* @par Inputs:
* @li target: A tensor. Must be one of the following types:
* float16, float32. \n

* @par Attributes:
* four Attributes, including:
* @li log_input: An optional bool. Defaults to "True" \n
*
* @par Attributes:
* @li full: An optional bool. Defaults to "False" \n
*
* @par Attributes:
* @li eps: An optional float. Defaults to "1e-8" \n
*
* @par Attributes:
* @li reduction: An optional string. Defaults to "mean" \n

* @par Outputs:
* loss: A Tensor has same element type as two inputs. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator PoissonNllLoss. \n
*/
REG_OP(PoissonNllLoss)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(log_input, Bool, true)
.ATTR(full, Bool, false)
.ATTR(eps, Float, 1e-8)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(PoissonNllLoss)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_

+ 28
- 1
third_party/fwkacllib/inc/ops/nn_ops.h View File

@@ -20,7 +20,34 @@
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
#include "graph/operator_reg.h"
#include "nn_pooling_ops.h"

namespace ge {
/**
* @brief Says whether the targets are in the top "k" predictions . \n

* @par Inputs:
* Three inputs, including:
* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
* @li k: A 1D Tensor of the same type as "targets".
* Specifies the number of top elements to look at for computing precision . \n

* @par Outputs:
* precision: A Tensor of type bool . \n

* @attention Constraints:
* @li targets must be non-negative tensor.

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator InTopKV2.
*/
REG_OP(InTopKV2)
.INPUT(predictions, TensorType({DT_FLOAT}))
.INPUT(targets, TensorType(IndexNumberType))
.INPUT(k, TensorType({IndexNumberType}))
.OUTPUT(precision, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(InTopKV2)
}// namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

+ 43
- 1
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -223,7 +223,29 @@ REG_OP(Relu6Grad)
.INPUT(features, TensorType::RealNumberType())
.OUTPUT(backprops, TensorType::RealNumberType())
.OP_END_FACTORY_REG(Relu6Grad)

/**
*@brief Calculate the elu_grad_v2 function.
*Applies the element-wise function:
* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
*@par Inputs:
*One inputs, including:
* @li grads: A tensor. Must be one of the following types:
* float16, float32.
* @li activations: A tensor. Must be one of the following types:
* float16, float32.
*
*@par Outputs:
*y: A Tensor with the same type and shape of grads's.
*
*@par Attributes:
*@li alpha: scalar parameter, default value = 1.0
*/
REG_OP(EluGradV2)
.INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(EluGradV2)
/**
* @brief Compute sigmoid of "x" element-wise . \n

@@ -842,6 +864,26 @@ REG_OP(SoftShrinkGrad)
.OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(lambd, Float, 0.5)
.OP_END_FACTORY_REG(SoftShrinkGrad)
/**
*@brief Calculate -ln(1+e^(-x)). \n

*@par Inputs:
*One inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n

*@par Outputs:
*One outputs, including:
* @li y: A tensor with the same type and shape of x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator LogSigmoid. \n
*/
REG_OP(LogSigmoid)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */
.OP_END_FACTORY_REG(LogSigmoid)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_

+ 153
- 2
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -37,7 +37,7 @@ namespace ge {
*@attention Constraints:
* This operator is a BatchNorm fusion operator for updating the moving
* averages for training.
* This operator is used in conjunction with BNTrainingUpdate.
* This operator is used in conjunction with BNTrainingReduce.
*/
REG_OP(BNTrainingReduce)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce)
.OUTPUT(square_sum, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BNTrainingReduce)

/**
*@brief Performs reduced batch normalization . \n

*@par Inputs:
*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n

*@par Outputs:
*@li sum: A 3D Tensor of type float32 for SUM reduced "x".
*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n

*@attention Constraints:
* This operator is a BatchNorm fusion operator for updating the moving
* averages for training.
* This operator is used in conjunction with BN3DTrainingReduce.
*/
REG_OP(BN3DTrainingReduce)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(sum, TensorType({DT_FLOAT}))
.OUTPUT(square_sum, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BN3DTrainingReduce)

/**
*@brief Performs the backpropagation of BatchNorm . \n

@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad)
.ATTR(epsilon, Float, 0.0001)
.OP_END_FACTORY_REG(BNTrainingReduceGrad)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Seven inputs, including:
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
* the gradient.
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
* for the mean of "x".
*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
* for the variance of "x".
*@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
* for the mean of "x".
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
* for the variance of "x" . \n

*@par Attributes:
*epsilon: An optional float32. Defaults to "0.0001". A small float number
* added to the variance of "x" . \n

*@par Outputs:
*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
* of "x" . \n

*@attention Constraints:
* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n

*@see BN3DTrainingReduceGrad
*/
REG_OP(BN3DTrainingReduceGrad)
.INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(diff_scale, TensorType({DT_FLOAT}))
.INPUT(diff_offset, TensorType({DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(batch_mean, TensorType({DT_FLOAT}))
.INPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.OP_END_FACTORY_REG(BN3DTrainingReduceGrad)

/**
*@brief Performs reduced batch normalization . \n

@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad)
*@attention Constraints:
*@li This operator is a BatchNorm fusion operator for updating the moving
averages for training.
*This operator is used in conjunction with BNTrainingReduce.
*This operator is used in conjunction with BNTrainingUpdate.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
* root instruction.
*/
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate)
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BNTrainingUpdate)

/**
*@brief Performs reduced batch normalization . \n

*@par Inputs:
* Seven inputs, including: (NDC1HWC0 supported)
*@li x: A 6D Tensor of type float16 or float32.
*@li sum: A 6D Tensor of type float32 for the output of operator
* BN3DTrainingUpdate.
*@li square_sum: A 6D Tensor of type float32 for the output of operator
* BN3DTrainingUpdate.
*@li scale: A 6D Tensor of type float32, for the scaling factor.
*@li offset: A 6D Tensor of type float32, for the scaling offset.
*@li mean: A 6D Tensor of type float32, for the updated mean.
*@li variance: A 6D Tensor of type float32, for the updated variance . \n

*@par Attributes:
*@li epsilon: A required float32, specifying the small value added to variance
* to avoid dividing by zero.
*@li factor: A required float32, specifying the weight for updating the mean
* and variance . \n

*@par Outputs:
* Five outputs, including: (NDC1HWC0 supported)
*@li y: A 6D Tensor of type float16 or float32, for normalized "x".
*@li mean: A 6D Tensor of type float32, for the updated mean.
*@li variance: A 6D Tensor of type float32, for the updated variance.
*@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n

*@attention Constraints:
*@li This operator is a BatchNorm fusion operator for updating the moving
averages for training.
*This operator is used in conjunction with BN3DTrainingUpdate.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
* root instruction.
*/
REG_OP(BN3DTrainingUpdate)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(sum, TensorType({DT_FLOAT}))
.INPUT(square_sum, TensorType({DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT}))
.INPUT(variance, TensorType({DT_FLOAT}))
.REQUIRED_ATTR(factor, Float)
.REQUIRED_ATTR(epsilon, Float)
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(mean, TensorType({DT_FLOAT}))
.OUTPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BN3DTrainingUpdate)

/**
*@brief Performs batch normalization for inference . \n

@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad)
.OUTPUT(diff_offset, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BNTrainingUpdateGrad)

/**
*@brief Performs the backpropagation of BatchNorm . \n

*@par Inputs:
* Four inputs, including:
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
* for the gradient.
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
* for the mean of "x".
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
* for the variance of "x" . \n

*@par Attributes:
*epsilon: An optional float32. Defaults to "0.0001". A small float number
* added to the variance of "x" . \n

*@par Outputs:
*@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
* for the offset of "scale".
*@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
* for the offset of "offset" . \n

*/
REG_OP(BN3DTrainingUpdateGrad)
.INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(batch_mean, TensorType({DT_FLOAT}))
.INPUT(batch_variance, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.OUTPUT(diff_scale, TensorType({DT_FLOAT}))
.OUTPUT(diff_offset, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)

/**
*@brief Performs the backpropagation of BatchNorm for inference . \n



+ 68
- 2
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -857,8 +857,8 @@ REG_OP(SliceDV2)
* @li sorted = true
* @li It's unstable sorted indices on the platform of Ascend310

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator TopK.
* @par Restrictions:
* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
*/
REG_OP(TopKD)
.INPUT(x, TensorType::RealNumberType())
@@ -883,6 +883,44 @@ REG_OP(TopKD)
* Number of top elements to look for along the last dimension (along each row
* for matrices) . \n

* @par Attributes:
* @li sorted: An optional bool. Defaults to true.
* If true, the resulting "k" elements will be sorted by the values in descending
* order.
* @li dim: An optional int. Defaults to -1. For reserved use.
* @li largest: An optional bool. Defaults to true. For reserved use. \n

* @par Outputs:
* @li values: A Tensor, specifying the sorted data. Has the same type as
* "input".
* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n

* @see TopK()
* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator TopKV2.
*/
REG_OP(TopKV2)
.INPUT(x, TensorType::RealNumberType())
.INPUT(k, TensorType({DT_INT32}))
.OUTPUT(values, TensorType::RealNumberType())
.OUTPUT(indices, TensorType({DT_INT32}))
.ATTR(sorted, Bool, true)
.ATTR(dim, Int, -1)
.ATTR(largest, Bool, true)
.OP_END_FACTORY_REG(TopKV2)

/**
* @brief Finds values and indices of the "k" largest elements for the last
* dimension . \n

* @par Inputs:
* Two inputs, including:
* @li x: A 1D or higher tensor of type BasicType, with the last dimension
* at least "k".
* @li k: A 0D Tensor of type int32.
* Number of top elements to look for along the last dimension (along each row
* for matrices) . \n

* @par Attributes:
* @li sorted: An optional bool. Defaults to true.
* If true, the resulting "k" elements will be sorted by the values in descending
@@ -2103,6 +2141,34 @@ REG_OP(StridedSliceV2)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(StridedSliceV2)

/**
*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n

*@par Inputs:
*Three inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32, int32. \n
*@li assist1: A tensor. Must be one of the following types:
* float16, float32, int32. \n
*@li assist2: A tensor. Must be one of the following types:
* float16, float32, int32. \n

* @par Attributes:
* @li dim: A required int. Used to select the dimension of this tensor. \n

*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n

*@par Third-party framework compatibility
*Compatible with the Pytorch operator IndexFill. \n
*/
REG_OP(IndexFillD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(dim, Int)
.OP_END_FACTORY_REG(IndexFillD)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 3
- 3
third_party/fwkacllib/inc/ops/sparse_ops.h View File

@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
REG_OP(SparseTensorDenseMatMul)
.INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
.INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
.INPUT(x1_shape, TensorType({DT_INT64}))
.INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
.INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
DT_COMPLEX128, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
.OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
DT_COMPLEX128, DT_FLOAT16}))
.ATTR(adjoint_a, Bool, false)
.ATTR(adjoint_b, Bool, false)


+ 96
- 0
third_party/fwkacllib/inc/ops/spectral_ops.h View File

@@ -26,6 +26,24 @@

namespace ge {

/**
*@brief Computes the inverse 1-dimensional discrete Fourier transform over the
inner-most dimension of `x`. \n

*@par Inputs:
*@li x: A Tensor. Must be the following types: complex64, complex128. \n

*@par Outputs:
*@li y: A complex tensor of the same rank as `x`. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow IFFT operator.
*/
REG_OP(IFFT)
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OP_END_FACTORY_REG(IFFT)

/**
*@brief Real-valued fast Fourier transform . \n

@@ -47,6 +65,84 @@ REG_OP(RFFT)
.OUTPUT(y, TensorType({DT_COMPLEX64}))
.OP_END_FACTORY_REG(RFFT)

/**
*@brief Inverse real-valued fast Fourier transform . \n

*@par Inputs:
*@li x: A complex64 tensor.
*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n

*@par Outputs:
*@li y: A float32 tensor of the same rank as `input`. The inner-most
dimension of `input` is replaced with the `fft_length` samples of its inverse
1D Fourier transform . \n

*@par Third-party framework compatibility
* Compatible with TensorFlow IRFFT operator.
*/
REG_OP(IRFFT)
.INPUT(x, TensorType({DT_COMPLEX64}))
.INPUT(fft_length, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(IRFFT)


/**
*@brief 2D fast Fourier transform. \n

*@par Inputs:
*@li x: A complex64 tensor..

*@par Outputs:
*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
dimensions of `input` are replaced with their 2D Fourier transform.\n

*@par Third-party framework compatibility
* Compatible with TensorFlow FFT2D operator.
*/
REG_OP(FFT2D)
.INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
.OP_END_FACTORY_REG(FFT2D)

/**
*@brief Calculate the one-dimensional discrete Fourier transform on the
innermost dimension of the input. \n

*@par Inputs:
*@li x: A Tensor. Must be the following types: complex64, complex128. \n

*@par Outputs:
*@li y: A complex tensor with the same shape as input. The innermost dimension
of the input is replaced by its 1-dimensional Fourier transform. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow FFT operator.
*/
REG_OP(FFT)
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OP_END_FACTORY_REG(FFT)

/**
*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
innermost dimension of the input. \n

*@par Inputs:
*@li x: A Tensor. Must be the following types: complex64, complex128. \n

*@par Outputs:
*@li y: A complex tensor with the same shape as input. The innermost dimension
of the input is replaced by its inverse two-dimensional Fourier transform. \n

*@par Third-party framework compatibility
* Compatible with TensorFlow IFFT2D operator.
*/
REG_OP(IFFT2D)
.INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
.OP_END_FACTORY_REG(IFFT2D)

} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_

+ 9
- 9
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -62,8 +62,8 @@ REG_OP(Split)
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

*@par Attributes:
*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n

*@par Outputs:
*y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
*@par Inputs:
* Three inputs, including:
*@li x: An ND Tensor.
*Must be one of the following types:
*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n

*@par Attributes:
*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
*num_split: A required int32. Specifies the number of output tensors. No default value . \n

*@par Outputs:
*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

*@par Attributes:
*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n

*@par Outputs:
*y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n


+ 1
- 1
third_party/fwkacllib/inc/ops/string_ops.h View File

@@ -488,7 +488,7 @@ include:
*/
REG_OP(AsString)
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
DT_DOUBLE, DT_BOOL}))
DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
.OUTPUT(y, TensorType({DT_STRING}))
.ATTR(precision, Int, -1)
.ATTR(scientific, Bool, false)


+ 19
- 0
third_party/fwkacllib/inc/runtime/config.h View File

@@ -46,6 +46,12 @@ typedef enum tagRtChipType {
CHIP_END,
} rtChipType_t;

typedef enum tagRtAicpuScheType {
SCHEDULE_SOFTWARE = 0, /* Software Schedule */
SCHEDULE_SOFTWARE_OPT,
SCHEDULE_HARDWARE, /* HWTS Schedule */
} rtAicpuScheType;

typedef enum tagRtVersion {
VER_BEGIN = 0,
VER_NA = VER_BEGIN,
@@ -184,6 +190,19 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
*/
RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);


/**
* @ingroup
* @brief get device feature ability by device id, such as task schedule ability.
* @param [in] deviceId
* @param [in] moduleType
* @param [in] featureType
* @param [out] value
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif


+ 5
- 0
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -62,6 +62,11 @@ typedef enum tagRtFeatureType {
FEATURE_TYPE_RSV
} rtFeatureType_t;

typedef enum tagRtDeviceFeatureType {
FEATURE_TYPE_SCHE,
FEATURE_TYPE_END,
} rtDeviceFeatureType_t;

typedef enum tagMemcpyInfo {
MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
MEMCPY_INFO_RSV


+ 35
- 0
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
#define RT_KERNEL_CUSTOM_AICPU (0x08)

/**
* @ingroup rt_kernel
* @brief kernel mode
**/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)

/**
* @ingroup rt_kernel
* @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
*/
RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register device binary with all kernel
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream);

/**
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description.
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
* @param [in] smDesc shared memory description
* @param [in] stream associated stream
* @param [in] kernelInfo kernel info
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);

/**
* @ingroup rt_kernel
* @brief launch kernel to device


+ 14
- 0
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_ALL_KERNEL,
} rtModelTaskType_t;

typedef enum tagModelStreamType {
@@ -127,6 +128,18 @@ typedef struct tagKernelTaskInfo {
uint16_t *argsOffset;
} rtKernelTaskInfo_t;

typedef struct tagAllKernelTaskInfo {
uint16_t blockDim;
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
void *devfunc;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;

typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@@ -251,6 +264,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allKernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;


+ 137
- 72
third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h View File

@@ -1,72 +1,137 @@
/**
* @file tune_api.h
*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
* 描述:mstune调优接口头文件
*/
/** @defgroup mstune mstune调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"
/**
* @ingroup mstune
*
* mstune status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};
// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";
/**
* @ingroup mstune
* @par 描述: 命令行调优
*
* @attention 无
* @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
/**
* @ingroup mstune
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
#endif
/**
* @file tune_api.h
*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
* 描述:mstune调优接口头文件
*/
/** @defgroup mstune mstune调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"

/**
* @ingroup mstune
*
* mstune status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};

// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";

#ifdef __cplusplus
extern "C" {
#endif

struct RunnerInitConfig {
// onilne online
std::string profPath;
std::string parserPath;
// ncs only
std::vector<uint32_t> devList;
};

struct RunnerOpInfo {
std::string opName;
uint64_t opCostTime;
uint64_t aicoreCostTime;
// gradient_split only
std::string modelName;
std::string opType;
std::vector<uint64_t> start;
std::vector<uint64_t> end;
};

struct RunnerModelInfo {
uint64_t totalCostTime;
};

struct RunnerRunResult {
std::vector<RunnerModelInfo> modelInfo;
std::vector<RunnerOpInfo> opInfo;
};

struct RunnerResult {
uint64_t totalCostTime;
std::map<std::string, uint64_t> opCostTime;
std::map<std::string, uint64_t> aicoreCostTime;
};

struct RunnerDataBuf {
void *ptr = nullptr;
size_t size = 0;
};

struct AOEBufferData {
std::shared_ptr<uint8_t> data = nullptr;
uint64_t length;
};

struct RunnerConfig {
bool isProf;
uint32_t loop;
// offline only
std::vector<RunnerDataBuf> input;
std::vector<RunnerDataBuf> output;
std::string modelPath;
RunnerDataBuf modelData;
// online only
uint32_t devId;
std::vector<std::vector<ge::Tensor>> inputs;
std::vector<ge::Graph> dependGraph; // run graph (for training)
};
#ifdef __cplusplus
}
#endif

/**
* @ingroup mstune
* @par 描述: 命令行调优
*
* @attention 无
* @param option [IN] 调优参数
* @param msg [OUT] 调优异常下返回信息
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);

/**
* @ingroup mstune
* @par 描述: 梯度调优
*
* @attention 无
* @param tuningGraph [IN] 调优图
* @param dependGraph [IN] 调优依赖图
* @param session [IN] ge连接会话
* @param option [IN] 参数集. 包含调优参数及ge参数
* @retval #MSTUNE_SUCCESS 执行成功
* @retval #MSTUNE_FAILED 执行失败
* @par 依赖:
* @li tune_api.cpp:该接口所属的开发包。
* @li tune_api.h:该接口声明所在的头文件。
* @see 无
* @since
*/
extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

#endif

Loading…
Cancel
Save