diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h index ef5b4772..eae87835 100644 --- a/inc/external/acl/acl.h +++ b/inc/external/acl/acl.h @@ -26,9 +26,9 @@ extern "C" { #endif // Current version is 1.0.0 -#define ACL_MAJOR_VERSION 1 -#define ACL_MINOR_VERSION 0 -#define ACL_PATCH_VERSION 0 +#define ACL_MAJOR_VERSION 1 +#define ACL_MINOR_VERSION 0 +#define ACL_PATCH_VERSION 0 /** * @ingroup AscendCL @@ -70,4 +70,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min } #endif -#endif // INC_EXTERNAL_ACL_ACL_H_ +#endif // INC_EXTERNAL_ACL_ACL_H_ diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index b3111860..0b520002 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045; static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; static const int ACL_ERROR_PROF_API_CONFLICT = 148047; static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; +static const int ACL_ERROR_INVALID_OPP_PATH = 148049; static const int ACL_ERROR_BAD_ALLOC = 200000; static const int ACL_ERROR_API_NOT_SUPPORT = 200001; @@ -134,42 +135,42 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005; #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE typedef enum { - ACL_DT_UNDEFINED = -1, - ACL_FLOAT = 0, - ACL_FLOAT16 = 1, - ACL_INT8 = 2, - ACL_INT32 = 3, - ACL_UINT8 = 4, - ACL_INT16 = 6, - ACL_UINT16 = 7, - ACL_UINT32 = 8, - ACL_INT64 = 9, - ACL_UINT64 = 10, - ACL_DOUBLE = 11, - ACL_BOOL = 12, - ACL_STRING = 13, + ACL_DT_UNDEFINED = -1, + ACL_FLOAT = 0, + ACL_FLOAT16 = 1, + ACL_INT8 = 2, + ACL_INT32 = 3, + ACL_UINT8 = 4, + ACL_INT16 = 6, + ACL_UINT16 = 7, + ACL_UINT32 = 8, + ACL_INT64 = 9, + ACL_UINT64 = 10, + ACL_DOUBLE = 11, + ACL_BOOL = 12, + ACL_STRING = 13, } aclDataType; typedef enum { - ACL_FORMAT_UNDEFINED = -1, - ACL_FORMAT_NCHW = 0, - ACL_FORMAT_NHWC = 1, - ACL_FORMAT_ND = 2, - ACL_FORMAT_NC1HWC0 = 3, - ACL_FORMAT_FRACTAL_Z = 4, - ACL_FORMAT_NC1HWC0_C04 = 12, - ACL_FORMAT_NDHWC = 27, - ACL_FORMAT_FRACTAL_NZ = 29, - ACL_FORMAT_NCDHW = 30, - ACL_FORMAT_NDC1HWC0 = 32, - ACL_FRACTAL_Z_3D = 33 + ACL_FORMAT_UNDEFINED = -1, + ACL_FORMAT_NCHW = 0, + ACL_FORMAT_NHWC = 1, + ACL_FORMAT_ND = 2, + ACL_FORMAT_NC1HWC0 = 3, + ACL_FORMAT_FRACTAL_Z = 4, + ACL_FORMAT_NC1HWC0_C04 = 12, + ACL_FORMAT_NDHWC = 27, + ACL_FORMAT_FRACTAL_NZ = 29, + ACL_FORMAT_NCDHW = 30, + ACL_FORMAT_NDC1HWC0 = 32, + ACL_FRACTAL_Z_3D = 33 } aclFormat; typedef enum { - ACL_DEBUG = 0, - ACL_INFO = 1, - ACL_WARNING = 2, - ACL_ERROR = 3, + ACL_DEBUG = 0, + ACL_INFO = 1, + ACL_WARNING = 2, + ACL_ERROR = 3, } aclLogLevel; /** @@ -304,7 +305,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); * @retval aclTensorDesc pointer. * @retval nullptr if param is invalid or run out of memory */ -ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, +ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, + int numDims, + const int64_t *dims, aclFormat format); /** @@ -326,7 +329,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, +ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc, + size_t dimsCount, int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); /** @@ -423,7 +427,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, +ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, + size_t index, + size_t dimRangeNum, int64_t *dimRange); /** @@ -460,7 +466,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, - aclTensorDesc **dstDesc); + aclTensorDesc **dstDesc); /** * @ingroup AscendCL @@ -548,7 +554,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu * * @retval null for failed. * @retval OtherValues success. - */ +*/ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); /** @@ -559,7 +565,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); /** @@ -599,12 +605,13 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu * @param ... [IN] the value of current log */ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, - const char *fmt, ...); + const char *fmt, ...); -#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) +#define ACL_APP_LOG(level, fmt, ...) \ + aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ +#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 5886d857..4f3e257f 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -27,19 +27,19 @@ extern "C" { #endif -#define ACL_MAX_DIM_CNT 128 -#define ACL_MAX_TENSOR_NAME_LEN 128 -#define ACL_MAX_BATCH_NUM 128 -#define ACL_MAX_HW_NUM 128 -#define ACL_MAX_SHAPE_COUNT 128 -#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF - -#define ACL_MDL_LOAD_FROM_FILE 1 -#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 -#define ACL_MDL_LOAD_FROM_MEM 3 -#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 -#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 -#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 +#define ACL_MAX_DIM_CNT 128 +#define ACL_MAX_TENSOR_NAME_LEN 128 +#define ACL_MAX_BATCH_NUM 128 +#define ACL_MAX_HW_NUM 128 +#define ACL_MAX_SHAPE_COUNT 128 +#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF + +#define ACL_MDL_LOAD_FROM_FILE 1 +#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 +#define ACL_MDL_LOAD_FROM_MEM 3 +#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 +#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 +#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" @@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo; typedef struct aclmdlConfigHandle aclmdlConfigHandle; typedef enum { - ACL_YUV420SP_U8 = 1, - ACL_XRGB8888_U8, - ACL_RGB888_U8, - ACL_YUV400_U8, - ACL_NC1HWC0DI_FP16, - ACL_NC1HWC0DI_S8, - ACL_ARGB8888_U8, - ACL_YUYV_U8, - ACL_YUV422SP_U8, - ACL_AYUV444_U8, - ACL_RAW10, - ACL_RAW12, - ACL_RAW16, - ACL_RAW24, - ACL_AIPP_RESERVED = 0xffff, + ACL_YUV420SP_U8 = 1, + ACL_XRGB8888_U8, + ACL_RGB888_U8, + ACL_YUV400_U8, + ACL_NC1HWC0DI_FP16, + ACL_NC1HWC0DI_S8, + ACL_ARGB8888_U8, + ACL_YUYV_U8, + ACL_YUV422SP_U8, + ACL_AYUV444_U8, + ACL_RAW10, + ACL_RAW12, + ACL_RAW16, + ACL_RAW24, + ACL_AIPP_RESERVED = 0xffff, } aclAippInputFormat; typedef enum { - ACL_MDL_PRIORITY_INT32 = 0, - ACL_MDL_LOAD_TYPE_SIZET, - ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ - ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ - ACL_MDL_MEM_SIZET, - ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ - ACL_MDL_WEIGHT_SIZET, - ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ - ACL_MDL_WORKSPACE_SIZET, - ACL_MDL_INPUTQ_NUM_SIZET, - ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ - ACL_MDL_OUTPUTQ_NUM_SIZET, - ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ + ACL_MDL_PRIORITY_INT32 = 0, + ACL_MDL_LOAD_TYPE_SIZET, + ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ + ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ + ACL_MDL_MEM_SIZET, + ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ + ACL_MDL_WEIGHT_SIZET, + ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ + ACL_MDL_WORKSPACE_SIZET, + ACL_MDL_INPUTQ_NUM_SIZET, + ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ + ACL_MDL_OUTPUTQ_NUM_SIZET, + ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ } aclmdlConfigAttr; typedef enum { - ACL_DATA_WITHOUT_AIPP = 0, - ACL_DATA_WITH_STATIC_AIPP, - ACL_DATA_WITH_DYNAMIC_AIPP, - ACL_DYNAMIC_AIPP_NODE + ACL_DATA_WITHOUT_AIPP = 0, + ACL_DATA_WITH_STATIC_AIPP, + ACL_DATA_WITH_DYNAMIC_AIPP, + ACL_DYNAMIC_AIPP_NODE } aclmdlInputAippType; typedef struct aclmdlIODims { - char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ - size_t dimCount; /**< dim array count */ - int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ + char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ + size_t dimCount; /**< dim array count */ + int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ } aclmdlIODims; typedef struct aclAippDims { - aclmdlIODims srcDims; /**< input dims before model transform */ - size_t srcSize; /**< input size before model transform */ - aclmdlIODims aippOutdims; /**< aipp output dims */ - size_t aippOutSize; /**< aipp output size */ + aclmdlIODims srcDims; /**< input dims before model transform */ + size_t srcSize; /**< input size before model transform */ + aclmdlIODims aippOutdims; /**< aipp output dims */ + size_t aippOutSize; /**< aipp output size */ } aclAippDims; typedef struct aclmdlBatch { - size_t batchCount; /**< batch array count */ - uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ + size_t batchCount; /**< batch array count */ + uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ } aclmdlBatch; typedef struct aclmdlHW { - size_t hwCount; /**< height&width array count */ - uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ + size_t hwCount; /**< height&width array count */ + uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ } aclmdlHW; typedef struct aclAippInfo { - aclAippInputFormat inputFormat; - int32_t srcImageSizeW; - int32_t srcImageSizeH; - int8_t cropSwitch; - int32_t loadStartPosW; - int32_t loadStartPosH; - int32_t cropSizeW; - int32_t cropSizeH; - int8_t resizeSwitch; - int32_t resizeOutputW; - int32_t resizeOutputH; - int8_t paddingSwitch; - int32_t leftPaddingSize; - int32_t rightPaddingSize; - int32_t topPaddingSize; - int32_t bottomPaddingSize; - int8_t cscSwitch; - int8_t rbuvSwapSwitch; - int8_t axSwapSwitch; - int8_t singleLineMode; - int32_t matrixR0C0; - int32_t matrixR0C1; - int32_t matrixR0C2; - int32_t matrixR1C0; - int32_t matrixR1C1; - int32_t matrixR1C2; - int32_t matrixR2C0; - int32_t matrixR2C1; - int32_t matrixR2C2; - int32_t outputBias0; - int32_t outputBias1; - int32_t outputBias2; - int32_t inputBias0; - int32_t inputBias1; - int32_t inputBias2; - int32_t meanChn0; - int32_t meanChn1; - int32_t meanChn2; - int32_t meanChn3; - float minChn0; - float minChn1; - float minChn2; - float minChn3; - float varReciChn0; - float varReciChn1; - float varReciChn2; - float varReciChn3; - aclFormat srcFormat; - aclDataType srcDatatype; - size_t srcDimNum; - size_t shapeCount; - aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; - aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ + aclAippInputFormat inputFormat; + int32_t srcImageSizeW; + int32_t srcImageSizeH; + int8_t cropSwitch; + int32_t loadStartPosW; + int32_t loadStartPosH; + int32_t cropSizeW; + int32_t cropSizeH; + int8_t resizeSwitch; + int32_t resizeOutputW; + int32_t resizeOutputH; + int8_t paddingSwitch; + int32_t leftPaddingSize; + int32_t rightPaddingSize; + int32_t topPaddingSize; + int32_t bottomPaddingSize; + int8_t cscSwitch; + int8_t rbuvSwapSwitch; + int8_t axSwapSwitch; + int8_t singleLineMode; + int32_t matrixR0C0; + int32_t matrixR0C1; + int32_t matrixR0C2; + int32_t matrixR1C0; + int32_t matrixR1C1; + int32_t matrixR1C2; + int32_t matrixR2C0; + int32_t matrixR2C1; + int32_t matrixR2C2; + int32_t outputBias0; + int32_t outputBias1; + int32_t outputBias2; + int32_t inputBias0; + int32_t inputBias1; + int32_t inputBias2; + int32_t meanChn0; + int32_t meanChn1; + int32_t meanChn2; + int32_t meanChn3; + float minChn0; + float minChn1; + float minChn2; + float minChn3; + float varReciChn0; + float varReciChn1; + float varReciChn2; + float varReciChn3; + aclFormat srcFormat; + aclDataType srcDatatype; + size_t srcDimNum; + size_t shapeCount; + aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; + aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ } aclAippInfo; /** @@ -339,7 +339,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, + uint32_t *modelId); /** * @ingroup AscendCL @@ -361,8 +362,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, - size_t workSize, void *weightPtr, size_t weightSize); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, + uint32_t *modelId, void *workPtr, size_t workSize, + void *weightPtr, size_t weightSize); /** * @ingroup AscendCL @@ -385,9 +387,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, - void *workPtr, size_t workSize, void *weightPtr, - size_t weightSize); +ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, + uint32_t *modelId, void *workPtr, size_t workSize, + void *weightPtr, size_t weightSize); /** * @ingroup AscendCL @@ -422,8 +424,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, - const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, - size_t outputQNum); + const uint32_t *inputQ, size_t inputQNum, + const uint32_t *outputQ, size_t outputQNum); /** * @ingroup AscendCL @@ -453,8 +455,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem */ -ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, + aclmdlDataset *output, aclrtStream stream); /** * @ingroup AscendCL @@ -829,11 +831,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, - int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, - int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, - int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, + int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, + int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, + int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); @@ -849,7 +851,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); /** @@ -863,7 +865,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); /** @@ -878,7 +880,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, int32_t srcImageSizeH); @@ -898,10 +900,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, - int32_t scfInputSizeH, int32_t scfOutputSizeW, - int32_t scfOutputSizeH, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, + int8_t scfSwitch, + int32_t scfInputSizeW, + int32_t scfInputSizeH, + int32_t scfOutputSizeW, + int32_t scfOutputSizeH, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -919,9 +925,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, - int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, + int8_t cropSwitch, + int32_t cropStartPosW, + int32_t cropStartPosH, + int32_t cropSizeW, + int32_t cropSizeH, uint64_t batchIndex); /** @@ -940,7 +950,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, int32_t paddingSizeTop, int32_t paddingSizeBottom, int32_t paddingSizeLeft, int32_t paddingSizeRight, @@ -961,10 +971,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, - int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, - int16_t dtcPixelMeanChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, + int16_t dtcPixelMeanChn0, + int16_t dtcPixelMeanChn1, + int16_t dtcPixelMeanChn2, + int16_t dtcPixelMeanChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -981,10 +994,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, - float dtcPixelMinChn1, float dtcPixelMinChn2, - float dtcPixelMinChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, + float dtcPixelMinChn0, + float dtcPixelMinChn1, + float dtcPixelMinChn2, + float dtcPixelMinChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -1001,10 +1017,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, * @retval OtherValues Failure * * @see aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, - float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, - float dtcPixelVarReciChn3, uint64_t batchIndex); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, + float dtcPixelVarReciChn0, + float dtcPixelVarReciChn1, + float dtcPixelVarReciChn2, + float dtcPixelVarReciChn3, + uint64_t batchIndex); /** * @ingroup AscendCL @@ -1020,8 +1039,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, + aclmdlDataset *dataset, + size_t index, const aclmdlAIPP *aippParmsSet); /** @@ -1038,8 +1059,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, + aclmdlDataset *dataset, + size_t index, const aclmdlAIPP *aippParmsSet); /** @@ -1057,8 +1080,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP - */ -ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, +*/ +ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, + size_t index, + aclmdlInputAippType *type, size_t *dynamicAttachedDataIndex); /** @@ -1075,7 +1100,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); /** @@ -1094,11 +1119,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind * * @retval ACL_SUCCESS The function is successfully executed * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, - char *opName, size_t opNameLen, aclTensorDesc **inputDesc, - size_t *numInputs, aclTensorDesc **outputDesc, - size_t *numOutputs); +*/ +ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, + uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs, + aclTensorDesc **outputDesc, size_t *numOutputs); /** * @ingroup AscendCL @@ -1106,7 +1130,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_ * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); /** @@ -1117,7 +1141,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); /** @@ -1126,7 +1150,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); /** @@ -1138,7 +1162,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ +*/ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); /** @@ -1148,7 +1172,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand * @retval the aclmdlConfigHandle pointer * * @see aclmdlDestroyConfigHandle - */ +*/ ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); /** @@ -1177,10 +1201,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, - const void *attrValue, size_t valueSize); + const void *attrValue, size_t valueSize); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ +#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h index d2e59bfb..b1be0d6e 100644 --- a/inc/external/acl/acl_op.h +++ b/inc/external/acl/acl_op.h @@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length); static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; typedef enum aclEngineType { - ACL_ENGINE_SYS, - ACL_ENGINE_AICORE, - ACL_ENGINE_VECTOR, + ACL_ENGINE_SYS, + ACL_ENGINE_AICORE, + ACL_ENGINE_VECTOR, } aclopEngineType; /** @@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, - const uint8_t *values); + const uint8_t *values); /** * @ingroup AscendCL @@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, - const int64_t *values); + const int64_t *values); /** * @ingroup AscendCL @@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, - const float *values); + const float *values); /** * @ingroup AscendCL @@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char * * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, - const char **values); + const char **values); /** * @ingroup AscendCL @@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, - const int *numValues, const int64_t *const values[]); +ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, + const char *attrName, + int numLists, + const int *numValues, + const int64_t *const values[]); /** * @ingroup AscendCL @@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char * @retval OtherValues Failure */ ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") -ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - const aclDataBuffer *const inputs[], int numOutputs, - const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], - const aclopAttr *attr, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + const aclDataBuffer *const inputs[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + aclDataBuffer *const outputs[], + const aclopAttr *attr, + aclrtStream stream); /** * @ingroup AscendCL @@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], - aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, + int numInputs, + aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], + int numOutputs, + aclTensorDesc *outputDesc[], + aclDataBuffer *outputs[], + aclopAttr *attr, + aclrtStream stream); /** * @ingroup AscendCL @@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, +ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *opAttr, aclopHandle **handle); /** @@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); * * @see aclopCreateHandle | aclCreateDataBuffer */ -ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, - const aclDataBuffer *const inputs[], int numOutputs, - aclDataBuffer *const outputs[], aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, + int numInputs, + const aclDataBuffer *const inputs[], + int numOutputs, + aclDataBuffer *const outputs[], + aclrtStream stream); /** * @ingroup AscendCL @@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, - const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, +ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, + const aclDataBuffer *srcBuffer, + const aclTensorDesc *dstDesc, + aclDataBuffer *dstBuffer, + uint8_t truncate, aclrtStream stream); /** @@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, +ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, + aclTensorDesc *dstDesc, + uint8_t truncate, aclopHandle **handle); + /** * @ingroup AscendCL * @brief create kernel @@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac * * @see aclopCompile */ -ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, - void *binData, int binSize, aclopEngineType enginetype, +ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, + const char *kernelId, + const char *kernelName, + void *binData, + int binSize, + aclopEngineType enginetype, aclDataDeallocator deallocator); + /** * @ingroup AscendCL * @brief create kernel @@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, +typedef aclError (*aclopCompileFunc)(int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *opAttr, aclopKernelDesc *aclopKernelDesc); /** @@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, - const void *args, uint32_t argSize); +ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, + const char *kernelId, + uint32_t blockDim, + const void *args, + uint32_t argSize); /** * @ingroup AscendCL @@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, - const aclTensorDesc *const inputDesc[], int numOutputs, - const aclTensorDesc *const outputDesc[], const aclopAttr *attr); +ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *attr); /** * @ingroup AscendCL @@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], - aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], +ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, + int numInputs, + aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], + int numOutputs, + aclTensorDesc *outputDesc[], aclopAttr *attr); + #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_OP_H_ +#endif // INC_EXTERNAL_ACL_ACL_OP_H_ diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index adae90c7..6bbb855c 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -24,18 +24,21 @@ extern "C" { #endif -typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; +typedef enum aclCompileType { + ACL_COMPILE_SYS, + ACL_COMPILE_UNREGISTERED +} aclopCompileType; typedef enum { - ACL_PRECISION_MODE, - ACL_AICORE_NUM, - ACL_AUTO_TUNE_MODE, - ACL_OP_SELECT_IMPL_MODE, - ACL_OPTYPELIST_FOR_IMPLMODE, - ACL_OP_DEBUG_LEVEL, - ACL_DEBUG_DIR, - ACL_OP_COMPILER_CACHE_MODE, - ACL_OP_COMPILER_CACHE_DIR + ACL_PRECISION_MODE, + ACL_AICORE_NUM, + ACL_AUTO_TUNE_MODE, + ACL_OP_SELECT_IMPL_MODE, + ACL_OPTYPELIST_FOR_IMPLMODE, + ACL_OP_DEBUG_LEVEL, + ACL_DEBUG_DIR, + ACL_OP_COMPILER_CACHE_MODE, + ACL_OP_COMPILER_CACHE_DIR } aclCompileOpt; /** @@ -56,10 +59,15 @@ typedef enum { * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], - int numOutputs, const aclTensorDesc *const outputDesc[], - const aclopAttr *attr, aclopEngineType engineType, - aclopCompileType compileFlag, const char *opPath); +ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, + int numInputs, + const aclTensorDesc *const inputDesc[], + int numOutputs, + const aclTensorDesc *const outputDesc[], + const aclopAttr *attr, + aclopEngineType engineType, + aclopCompileType compileFlag, + const char *opPath); /** * @ingroup AscendCL @@ -82,10 +90,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( - const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], - int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, - aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, + int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], + int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], + const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, + const char *opPath, aclrtStream stream); /** * @ingroup AscendCL @@ -103,4 +112,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val } #endif -#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ +#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h index 990c70cf..d2675124 100644 --- a/inc/external/acl/acl_prof.h +++ b/inc/external/acl/acl_prof.h @@ -23,21 +23,21 @@ extern "C" { #endif -#define ACL_PROF_ACL_API 0x0001 -#define ACL_PROF_TASK_TIME 0x0002 -#define ACL_PROF_AICORE_METRICS 0x0004 -#define ACL_PROF_AICPU 0x0008 +#define ACL_PROF_ACL_API 0x0001 +#define ACL_PROF_TASK_TIME 0x0002 +#define ACL_PROF_AICORE_METRICS 0x0004 +#define ACL_PROF_AICPU 0x0008 -#define ACL_PROF_MAX_OP_NAME_LEN 257 -#define ACL_PROF_MAX_OP_TYPE_LEN 65 +#define ACL_PROF_MAX_OP_NAME_LEN 257 +#define ACL_PROF_MAX_OP_TYPE_LEN 65 typedef enum { - ACL_AICORE_ARITHMETIC_UTILIZATION = 0, - ACL_AICORE_PIPE_UTILIZATION = 1, - ACL_AICORE_MEMORY_BANDWIDTH = 2, - ACL_AICORE_L0B_AND_WIDTH = 3, - ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, - ACL_AICORE_NONE = 0xFF + ACL_AICORE_ARITHMETIC_UTILIZATION = 0, + ACL_AICORE_PIPE_UTILIZATION = 1, + ACL_AICORE_MEMORY_BANDWIDTH = 2, + ACL_AICORE_L0B_AND_WIDTH = 3, + ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, + ACL_AICORE_NONE = 0xFF } aclprofAicoreMetrics; typedef struct aclprofConfig aclprofConfig; @@ -98,8 +98,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); * @see aclprofDestroyConfig */ ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, - aclprofAicoreMetrics aicoreMetrics, - aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); + aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); /** * @ingroup AscendCL @@ -139,7 +138,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); * * @see aclprofModelUnSubscribe */ -ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); +ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, + const aclprofSubscribeConfig *profSubscribeConfig); /** * @ingroup AscendCL @@ -167,7 +167,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); * @see aclprofDestroySubscribeConfig */ ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, - aclprofAicoreMetrics aicoreMetrics, void *fd); + aclprofAicoreMetrics aicoreMetrics, void *fd); /** * @ingroup AscendCL @@ -219,8 +219,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, - size_t opTypeLen); +ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, + char *opType, size_t opTypeLen); /** * @ingroup AscendCL @@ -235,8 +235,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, - size_t opNameLen); +ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, + char *opName, size_t opNameLen); /** * @ingroup AscendCL @@ -293,4 +293,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe } #endif -#endif // INC_EXTERNAL_ACL_PROF_H_ +#endif // INC_EXTERNAL_ACL_PROF_H_ diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index eb6b4240..6fd2da6e 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -26,62 +26,62 @@ extern "C" { #endif typedef enum aclrtRunMode { - ACL_DEVICE, - ACL_HOST, + ACL_DEVICE, + ACL_HOST, } aclrtRunMode; typedef enum aclrtTsId { - ACL_TS_ID_AICORE = 0, - ACL_TS_ID_AIVECTOR = 1, - ACL_TS_ID_RESERVED = 2, + ACL_TS_ID_AICORE = 0, + ACL_TS_ID_AIVECTOR = 1, + ACL_TS_ID_RESERVED = 2, } aclrtTsId; typedef enum aclrtEventStatus { - ACL_EVENT_STATUS_COMPLETE = 0, - ACL_EVENT_STATUS_NOT_READY = 1, - ACL_EVENT_STATUS_RESERVED = 2, + ACL_EVENT_STATUS_COMPLETE = 0, + ACL_EVENT_STATUS_NOT_READY = 1, + ACL_EVENT_STATUS_RESERVED = 2, } aclrtEventStatus; typedef enum aclrtCallbackBlockType { - ACL_CALLBACK_NO_BLOCK, - ACL_CALLBACK_BLOCK, + ACL_CALLBACK_NO_BLOCK, + ACL_CALLBACK_BLOCK, } aclrtCallbackBlockType; typedef enum aclrtMemcpyKind { - ACL_MEMCPY_HOST_TO_HOST, - ACL_MEMCPY_HOST_TO_DEVICE, - ACL_MEMCPY_DEVICE_TO_HOST, - ACL_MEMCPY_DEVICE_TO_DEVICE, + ACL_MEMCPY_HOST_TO_HOST, + ACL_MEMCPY_HOST_TO_DEVICE, + ACL_MEMCPY_DEVICE_TO_HOST, + ACL_MEMCPY_DEVICE_TO_DEVICE, } aclrtMemcpyKind; typedef enum aclrtMemMallocPolicy { - ACL_MEM_MALLOC_HUGE_FIRST, - ACL_MEM_MALLOC_HUGE_ONLY, - ACL_MEM_MALLOC_NORMAL_ONLY, - ACL_MEM_MALLOC_HUGE_FIRST_P2P, - ACL_MEM_MALLOC_HUGE_ONLY_P2P, - ACL_MEM_MALLOC_NORMAL_ONLY_P2P, + ACL_MEM_MALLOC_HUGE_FIRST, + ACL_MEM_MALLOC_HUGE_ONLY, + ACL_MEM_MALLOC_NORMAL_ONLY, + ACL_MEM_MALLOC_HUGE_FIRST_P2P, + ACL_MEM_MALLOC_HUGE_ONLY_P2P, + ACL_MEM_MALLOC_NORMAL_ONLY_P2P, } aclrtMemMallocPolicy; typedef enum aclrtMemAttr { - ACL_DDR_MEM, - ACL_HBM_MEM, - ACL_DDR_MEM_HUGE, - ACL_DDR_MEM_NORMAL, - ACL_HBM_MEM_HUGE, - ACL_HBM_MEM_NORMAL, - ACL_DDR_MEM_P2P_HUGE, - ACL_DDR_MEM_P2P_NORMAL, - ACL_HBM_MEM_P2P_HUGE, - ACL_HBM_MEM_P2P_NORMAL, + ACL_DDR_MEM, + ACL_HBM_MEM, + ACL_DDR_MEM_HUGE, + ACL_DDR_MEM_NORMAL, + ACL_HBM_MEM_HUGE, + ACL_HBM_MEM_NORMAL, + ACL_DDR_MEM_P2P_HUGE, + ACL_DDR_MEM_P2P_NORMAL, + ACL_HBM_MEM_P2P_HUGE, + ACL_HBM_MEM_P2P_NORMAL, } aclrtMemAttr; typedef enum aclrtGroupAttr { - ACL_GROUP_AICORE_INT, - ACL_GROUP_AIV_INT, - ACL_GROUP_AIC_INT, - ACL_GROUP_SDMANUM_INT, - ACL_GROUP_ASQNUM_INT + ACL_GROUP_AICORE_INT, + ACL_GROUP_AIV_INT, + ACL_GROUP_AIC_INT, + ACL_GROUP_SDMANUM_INT, + ACL_GROUP_ASQNUM_INT } aclrtGroupAttr; typedef struct tagRtGroupInfo aclrtGroupInfo; @@ -472,7 +472,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre */ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); -/** + /** * @ingroup AscendCL * @brief Queries an event's status * @@ -534,7 +534,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, * * @see aclrtFree | acldvppMalloc | aclrtMallocCached */ -ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); +ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, + size_t size, + aclrtMemMallocPolicy policy); /** * @ingroup AscendCL @@ -557,7 +559,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal * * @see aclrtFree | aclrtMalloc */ -ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); +ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, + size_t size, + aclrtMemMallocPolicy policy); /** * @ingroup AscendCL @@ -648,7 +652,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, +ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, + size_t destMax, + const void *src, + size_t count, aclrtMemcpyKind kind); /** @@ -695,31 +702,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t * * @see aclrtSynchronizeStream */ -ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, - aclrtMemcpyKind kind, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, + size_t destMax, + const void *src, + size_t count, + aclrtMemcpyKind kind, + aclrtStream stream); /** - * @ingroup AscendCL - * @brief Asynchronous initialize memory - * and set contents of memory to specified value async - * - * @par Function +* @ingroup AscendCL +* @brief Asynchronous initialize memory +* and set contents of memory to specified value async +* +* @par Function * The memory to be initialized is on the Host or device side, * and the system determines whether * it is host or device according to the address * - * @param devPtr [IN] destination address pointer - * @param maxCount [IN] Max length of destination address memory - * @param value [IN] set value - * @param count [IN] the number of byte to set - * @param stream [IN] asynchronized task stream - * - * @retval ACL_SUCCESS The function is successfully executed. - * @retval OtherValues Failure - * - * @see aclrtSynchronizeStream - */ -ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, +* @param devPtr [IN] destination address pointer +* @param maxCount [IN] Max length of destination address memory +* @param value [IN] set value +* @param count [IN] the number of byte to set +* @param stream [IN] asynchronized task stream +* +* @retval ACL_SUCCESS The function is successfully executed. +* @retval OtherValues Failure +* +* @see aclrtSynchronizeStream +*/ +ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, + size_t maxCount, + int32_t value, + size_t count, aclrtStream stream); /** @@ -865,8 +879,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); * * @see aclrtGetGroupCount | aclrtGetAllGroupInfo */ -ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, - aclrtGroupAttr attr, void *attrValue, size_t valueLen, +ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, + int32_t groupId, + aclrtGroupAttr attr, + void *attrValue, + size_t valueLen, size_t *paramRetSize); /** @@ -929,4 +946,5 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si } #endif -#endif // INC_EXTERNAL_ACL_ACL_RT_H_ +#endif // INC_EXTERNAL_ACL_ACL_RT_H_ + diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h index c357518d..61995121 100644 --- a/inc/external/acl/acl_tdt.h +++ b/inc/external/acl/acl_tdt.h @@ -24,10 +24,10 @@ extern "C" { #endif enum acltdtTensorType { - ACL_TENSOR_DATA_UNDEFINED = -1, - ACL_TENSOR_DATA_TENSOR, - ACL_TENSOR_DATA_END_OF_SEQUENCE, - ACL_TENSOR_DATA_ABNORMAL + ACL_TENSOR_DATA_UNDEFINED = -1, + ACL_TENSOR_DATA_TENSOR, + ACL_TENSOR_DATA_END_OF_SEQUENCE, + ACL_TENSOR_DATA_ABNORMAL }; typedef struct acltdtDataItem acltdtDataItem; @@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem * * * @retval null for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); /** @@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt * * @retval 0 for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); /** @@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI * * @retval 0 for failed * @retval OtherValues success - */ +*/ ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); /** @@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte * * @see acltdtDestroyDataItem */ -ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, - aclDataType dataType, void *data, size_t size); +ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, + const int64_t *dims, + size_t dimNum, + aclDataType dataType, + void *data, + size_t size); /** * @ingroup AscendCL @@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); * * @see acltdtReceiveTensor */ -ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, +ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, + const acltdtDataset *dataset, int32_t timeout); /** @@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, * * @see acltdtSendTensor */ -ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, +ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, + acltdtDataset *dataset, int32_t timeout); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ +#endif //INC_EXTERNAL_ACL_ACL_TDT_H_ + diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h index 041fc7ae..b477a18c 100644 --- a/inc/external/acl/error_codes/ge_error_codes.h +++ b/inc/external/acl/error_codes/ge_error_codes.h @@ -17,6 +17,20 @@ #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ +#if defined(_MSC_VER) +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY _declspec(dllexport) +#else +#define GE_FUNC_VISIBILITY +#endif +#else +#ifdef FUNC_VISIBILITY +#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define GE_FUNC_VISIBILITY +#endif +#endif + #include #ifdef __cplusplus diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index d2373525..47f16d9f 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -23,79 +23,80 @@ extern "C" { #endif -static const int32_t ACL_RT_SUCCESS = 0; // success +static const int32_t ACL_RT_SUCCESS = 0; // success -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error #ifdef __cplusplus } #endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h index 3d81eb2b..a2bd8c61 100644 --- a/inc/external/acl/ops/acl_cblas.h +++ b/inc/external/acl/ops/acl_cblas.h @@ -23,9 +23,17 @@ extern "C" { #endif -typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; +typedef enum aclTransType { + ACL_TRANS_N, + ACL_TRANS_T, + ACL_TRANS_NZ, + ACL_TRANS_NZ_T +} aclTransType; -typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; +typedef enum aclComputeType { + ACL_COMPUTE_HIGH_PRECISION, + ACL_COMPUTE_LOW_PRECISION +} aclComputeType; /** * @ingroup AscendCL @@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, - aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, - const void *beta, void *y, int incy, aclDataType dataTypeY, - aclComputeType type, aclrtStream stream); +*/ +ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, + const void *alpha, const void *a, int lda, aclDataType dataTypeA, + const void *x, int incx, aclDataType dataTypeX, + const void *beta, void *y, int incy, aclDataType dataTypeY, + aclComputeType type, aclrtStream stream); /** * @ingroup AscendCL @@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure - */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, - aclDataType dataTypeX, aclDataType dataTypeY, - aclComputeType type, aclopHandle **handle); +*/ +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, + int m, + int n, + aclDataType dataTypeA, + aclDataType dataTypeX, + aclDataType dataTypeY, + aclComputeType type, + aclopHandle **handle); /** * @ingroup AscendCL @@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, - const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, - const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, + int m, + int n, + const aclFloat16 *alpha, + const aclFloat16 *a, + int lda, + const aclFloat16 *x, + int incx, + const aclFloat16 *beta, + aclFloat16 *y, + int incy, + aclComputeType type, aclrtStream stream); /** @@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, + int m, + int n, + aclComputeType type, aclopHandle **handle); /** @@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, - int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, - int incy, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, + int m, + int n, + const int32_t *alpha, + const int8_t *a, + int lda, + const int8_t *x, + int incx, + const int32_t *beta, + int32_t *y, + int incy, + aclComputeType type, + aclrtStream stream); /** * @ingroup AscendCL @@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, + int m, + int n, + aclComputeType type, aclopHandle **handle); /** @@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const void *alpha, const void *matrixA, int lda, - aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, - const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, - aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const void *alpha, + const void *matrixA, + int lda, + aclDataType dataTypeA, + const void *matrixB, + int ldb, + aclDataType dataTypeB, + const void *beta, + void *matrixC, + int ldc, + aclDataType dataTypeC, + aclComputeType type, + aclrtStream stream); + /** * @ingroup AscendCL @@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclDataType dataTypeA, - aclDataType dataTypeB, aclDataType dataTypeC, - aclComputeType type, aclopHandle **handle); +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclDataType dataTypeA, + aclDataType dataTypeB, + aclDataType dataTypeC, + aclComputeType type, + aclopHandle **handle); + /** * @ingroup AscendCL @@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, - const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, - aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const aclFloat16 *alpha, + const aclFloat16 *matrixA, + int lda, + const aclFloat16 *matrixB, + int ldb, + const aclFloat16 *beta, + aclFloat16 *matrixC, + int ldc, + aclComputeType type, + aclrtStream stream); /** * @ingroup AscendCL @@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclComputeType type, aclopHandle **handle); /** @@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, - int k, const int32_t *alpha, const int8_t *matrixA, int lda, - const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, - int ldc, aclComputeType type, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + const int32_t *alpha, + const int8_t *matrixA, + int lda, + const int8_t *matrixB, + int ldb, + const int32_t *beta, + int32_t *matrixC, + int ldc, + aclComputeType type, + aclrtStream stream); + /** * @ingroup AscendCL @@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, - int m, int n, int k, aclComputeType type, +ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, + aclTransType transB, + aclTransType transC, + int m, + int n, + int k, + aclComputeType type, aclopHandle **handle); #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index 8f5d3904..42ec4a8d 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output // Supported Pixel Format enum acldvppPixelFormat { - PIXEL_FORMAT_YUV_400 = 0, // 0 - PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 - PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 - PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 - PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 - PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 - PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 - PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 - PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 - PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 - PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 - PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 - PIXEL_FORMAT_RGB_888 = 12, // 12 - PIXEL_FORMAT_BGR_888 = 13, // 13 - PIXEL_FORMAT_ARGB_8888 = 14, // 14 - PIXEL_FORMAT_ABGR_8888 = 15, // 15 - PIXEL_FORMAT_RGBA_8888 = 16, // 16 - PIXEL_FORMAT_BGRA_8888 = 17, // 17 - PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 - PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 - PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 - PIXEL_FORMAT_YVU_PLANAR_422, - PIXEL_FORMAT_YVU_PLANAR_444, - PIXEL_FORMAT_RGB_444 = 23, - PIXEL_FORMAT_BGR_444, - PIXEL_FORMAT_ARGB_4444, - PIXEL_FORMAT_ABGR_4444, - PIXEL_FORMAT_RGBA_4444, - PIXEL_FORMAT_BGRA_4444, - PIXEL_FORMAT_RGB_555, - PIXEL_FORMAT_BGR_555, - PIXEL_FORMAT_RGB_565, - PIXEL_FORMAT_BGR_565, - PIXEL_FORMAT_ARGB_1555, - PIXEL_FORMAT_ABGR_1555, - PIXEL_FORMAT_RGBA_1555, - PIXEL_FORMAT_BGRA_1555, - PIXEL_FORMAT_ARGB_8565, - PIXEL_FORMAT_ABGR_8565, - PIXEL_FORMAT_RGBA_8565, - PIXEL_FORMAT_BGRA_8565, - PIXEL_FORMAT_RGB_BAYER_8BPP = 50, - PIXEL_FORMAT_RGB_BAYER_10BPP, - PIXEL_FORMAT_RGB_BAYER_12BPP, - PIXEL_FORMAT_RGB_BAYER_14BPP, - PIXEL_FORMAT_RGB_BAYER_16BPP, - PIXEL_FORMAT_BGR_888_PLANAR = 70, - PIXEL_FORMAT_HSV_888_PACKAGE, - PIXEL_FORMAT_HSV_888_PLANAR, - PIXEL_FORMAT_LAB_888_PACKAGE, - PIXEL_FORMAT_LAB_888_PLANAR, - PIXEL_FORMAT_S8C1, - PIXEL_FORMAT_S8C2_PACKAGE, - PIXEL_FORMAT_S8C2_PLANAR, - PIXEL_FORMAT_S16C1, - PIXEL_FORMAT_U8C1, - PIXEL_FORMAT_U16C1, - PIXEL_FORMAT_S32C1, - PIXEL_FORMAT_U32C1, - PIXEL_FORMAT_U64C1, - PIXEL_FORMAT_S64C1, - PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, - PIXEL_FORMAT_YVU_SEMIPLANAR_440, - PIXEL_FORMAT_FLOAT32, - PIXEL_FORMAT_BUTT, - PIXEL_FORMAT_UNKNOWN = 10000 + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 }; // Stream Format -enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; +enum acldvppStreamFormat { + H265_MAIN_LEVEL = 0, + H264_BASELINE_LEVEL, + H264_MAIN_LEVEL, + H264_HIGH_LEVEL +}; // Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; +enum acldvppChannelMode { + DVPP_CHNMODE_VPC = 1, + DVPP_CHNMODE_JPEGD = 2, + DVPP_CHNMODE_JPEGE = 4 +}; // Supported Border Type -enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; +enum acldvppBorderType { + BORDER_CONSTANT = 0, + BORDER_REPLICATE, + BORDER_REFLECT, + BORDER_REFLECT_101 +}; // Venc parameter type enum aclvencChannelDescParamType { - ACL_VENC_THREAD_ID_UINT64 = 0, - ACL_VENC_CALLBACK_PTR, - ACL_VENC_PIXEL_FORMAT_UINT32, - ACL_VENC_ENCODE_TYPE_UINT32, - ACL_VENC_PIC_WIDTH_UINT32, - ACL_VENC_PIC_HEIGHT_UINT32, - ACL_VENC_KEY_FRAME_INTERVAL_UINT32, - ACL_VENC_BUF_ADDR_PTR, - ACL_VENC_BUF_SIZE_UINT32, - ACL_VENC_RC_MODE_UINT32, - ACL_VENC_SRC_RATE_UINT32, - ACL_VENC_MAX_BITRATE_UINT32, - ACL_VENC_MAX_IP_PROP_UINT32 + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 }; // Jpeg picture format enum acldvppJpegFormat { - ACL_JPEG_CSS_444 = 0, - ACL_JPEG_CSS_422, - ACL_JPEG_CSS_420, - ACL_JPEG_CSS_GRAY, - ACL_JPEG_CSS_440, - ACL_JPEG_CSS_411, - ACL_JPEG_CSS_UNKNOWN = 1000 + ACL_JPEG_CSS_444 = 0, + ACL_JPEG_CSS_422, + ACL_JPEG_CSS_420, + ACL_JPEG_CSS_GRAY, + ACL_JPEG_CSS_440, + ACL_JPEG_CSS_411, + ACL_JPEG_CSS_UNKNOWN = 1000 }; /** @@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD * @retval null for failed. * @retval other success */ -ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, + uint32_t left, + uint32_t right, + uint32_t top, uint32_t bottom); /** @@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - const void *param); + aclvencChannelDescParamType paramType, size_t length, const void *param); /** * @ingroup AscendCL @@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, - size_t *paramRetSize, void *param); + aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); /** * @ingroup AscendCL @@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, int32_t *components); /** @@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, - uint32_t *height, int32_t *components, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, + uint32_t size, + uint32_t *width, + uint32_t *height, + int32_t *components, acldvppJpegFormat *format); /** @@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_ * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, - const acldvppJpegeConfig *config, uint32_t *size); + const acldvppJpegeConfig *config, + uint32_t *size); /** * @ingroup AscendCL @@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_ * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, - uint32_t *height, int32_t *components); +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, + uint32_t dataSize, + uint32_t *width, + uint32_t *height, + int32_t *components); /** * @ingroup AscendCL @@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, + uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, + uint32_t *decSize); /** * @ingroup AscendCL @@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe * @see acldvppCreateChannel | acldvppCreatePicDesc * | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** @@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, aclrtStream stream); /** @@ -1734,9 +1769,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], aclrtStream stream); + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1759,9 +1797,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, - acldvppRoiConfig *pasteArea, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, + aclrtStream stream); /** * @ingroup AscendCL @@ -1786,11 +1827,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, - uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - acldvppRoiConfig *pasteAreas[], aclrtStream stream); + ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, + uint32_t *roiNums, + uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], + aclrtStream stream); /** * @ingroup AscendCL @@ -1818,8 +1862,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -1837,8 +1884,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreateJpegeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - const void *data, uint32_t *size, acldvppJpegeConfig *config, +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + const void *data, + uint32_t *size, + acldvppJpegeConfig *config, aclrtStream stream); /** @@ -1856,8 +1906,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, + const void *data, + uint32_t size, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -1912,8 +1965,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + acldvppPicDesc *output, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -1932,8 +1988,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame */ -ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, - aclvdecFrameConfig *config, void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, + acldvppStreamDesc *input, + aclvdecFrameConfig *config, + void *userData); /** * @ingroup AscendCL @@ -1954,8 +2012,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + aclrtStream stream); /** * @ingroup AscendCL @@ -1977,8 +2037,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -1990,7 +2053,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, + uint32_t mode); /** * @ingroup AscendCL @@ -2025,7 +2089,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, + uint32_t outMode); /** * @ingroup AscendCL @@ -2122,7 +2187,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, + uint32_t dim, + uint8_t **data, uint32_t *len); /** * @ingroup AscendCL @@ -2140,8 +2207,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap */ ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppLutMap *lutMap, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, + aclrtStream stream); /** * @ingroup AscendCL @@ -2162,7 +2231,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); * * @retval ACL_SUCCESS for success, other for failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, + uint32_t index, double value); /** @@ -2307,8 +2377,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, - const acldvppBorderConfig *borderConfig, aclrtStream stream); + const acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, + aclrtStream stream); /** * @ingroup AscendCL @@ -2325,8 +2397,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, - acldvppHist *hist, void *reserve, aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, + acldvppPicDesc *srcPicDesc, + acldvppHist *hist, + void *reserve, + aclrtStream stream); /** * @ingroup AscendCL @@ -2335,7 +2410,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel * @retval null for failed. * @retval OtherValues success. */ -ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); +ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); /** * @ingroup AscendCL @@ -2392,7 +2467,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, * * @see acldvppCreateHist | acldvppVpcCalcHistAsync */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); /** * @ingroup AscendCL @@ -2415,4 +2490,4 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h index 27dc367a..40cd50cb 100644 --- a/inc/external/acl/ops/acl_fv.h +++ b/inc/external/acl/ops/acl_fv.h @@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult; // search operation type enum aclfvSearchType { - SEARCH_1_N, // 1:N operation type - SEARCH_N_M // N:M operation type + SEARCH_1_N, // 1:N operation type + SEARCH_N_M // N:M operation type }; /** @@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t * @retval OtherValues success. */ ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, - uint32_t featureLen, uint32_t featureCount, - uint8_t *featureData, uint32_t featureDataLen); + uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen); /** * @ingroup AscendCL @@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp * @retval null for failed. OtherValues success */ ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, - uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, - uint32_t *resultOffset, float *resultDistance, - uint32_t dataLen); + uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance, + uint32_t dataLen); /** * @ingroup AscendCL @@ -350,4 +348,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 46d934e6..311e78f2 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -27,7 +27,7 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief Initialize HCCL. @@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root * @param sendBuf A pointer identifying the input data address of the operator. * @param recvBuf A pointer identifying the output data address of the operator. * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, - * float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, - HcclComm comm, aclrtStream stream); +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief Broadcast operator. @@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc * @param root An integer(u32) identifying the the root rank in the operator. * @param comm A pointer identifying the communication resource based on * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, +aclrtStream stream); /** * @brief ReduceScatter operator. @@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, - HcclReduceOp op, HcclComm comm, aclrtStream stream); +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief AllGather operator. @@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, +HcclComm comm, aclrtStream stream); /** * @brief Destroy HCCL comm @@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_H_ +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index 0e832396..50a64795 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -16,10 +16,10 @@ /** * @file hccl_types.h - * @brief HCCL data type definition - * + * @brief HCCL data type definition + * */ - + #ifndef HCCL_TYPES_H_ #define HCCL_TYPES_H_ @@ -27,33 +27,33 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief HCCL functions return value definition */ typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } HcclResult; /** @@ -65,37 +65,37 @@ typedef void *HcclComm; * @brief HCCL Reduction opperation */ typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ } HcclReduceOp; /** * @brief HCCL data type */ typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length /** * @brief HCCL root info */ typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; + char internal[HCCL_ROOT_INFO_BYTES]; } HcclRootInfo; #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_TYPES_H_ +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index d2373525..47f16d9f 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -23,79 +23,80 @@ extern "C" { #endif -static const int32_t ACL_RT_SUCCESS = 0; // success +static const int32_t ACL_RT_SUCCESS = 0; // success -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error #ifdef __cplusplus } #endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index a4786cd3..181bf694 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -107,11 +107,13 @@ across multiple sessions . \n REG_OP(Unbatch) .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(index, TensorType({DT_INT64})) .INPUT(id, TensorType({DT_INT64})) .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .REQUIRED_ATTR(timeout_micros, Int) .ATTR(container, String, "") .ATTR(shared_name, String, "") @@ -146,13 +148,16 @@ across multiple sessions . \n REG_OP(UnbatchGrad) .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(index, TensorType({DT_INT64})) .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(id, TensorType({DT_INT64})) .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ - DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) + DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .ATTR(container, String, "") .ATTR(shared_name, String, "") .OP_END_FACTORY_REG(UnbatchGrad) diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 45303828..0043c027 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear) .ATTR(shared_name, String, "") .OP_END_FACTORY_REG(OrderedMapClear) +/** +*@brief FakeQueue, support tf api FixedLengthRecordReader. \n + +*@par Inputs: +*Including: +* @li resource: A Tensor of type DT_RESOURCE. + +*@par Outputs: +*handle: A Tensor of type DT_STRING ref. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator FakeQueue. +*/ +REG_OP(FakeQueue) + .INPUT(resource, TensorType({DT_RESOURCE})) + .OUTPUT(handle, TensorType({DT_STRING})) + .OP_END_FACTORY_REG(FakeQueue) + /** *@brief Returns the number of incomplete elements in the underlying container. \n @@ -2258,6 +2276,7 @@ REG_OP(LruCache) .ATTR(shared_name, String, "LruCache") .ATTR(cache_size, Int, 100000) .ATTR(load_factor, Float, 1) + .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(LruCache) /** @@ -2277,9 +2296,9 @@ REG_OP(CacheAdd) .INPUT(cache, TensorType({DT_RESOURCE})) .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(swap_in_idx, TensorType({DT_INT64})) + .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(swap_out_idx, TensorType({DT_INT64})) + .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(CacheAdd) /** @@ -2295,9 +2314,31 @@ REG_OP(CacheAdd) REG_OP(CacheRemoteIndexToLocal) .INPUT(cache, TensorType({DT_RESOURCE})) .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) - .OUTPUT(local_idx, TensorType({DT_INT64})) + .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(CacheRemoteIndexToLocal) +/** +*@brief CacheAllToLocalIndex, get id in cache +*@par Inputs: +*cache: resource data +*local_idx: id in cache. +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(CacheAllIndexToLocal) + .INPUT(cache, TensorType({DT_RESOURCE})) + .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) + .REQUIRED_ATTR(dtype, Type) + .OP_END_FACTORY_REG(CacheAllIndexToLocal) + +REG_OP(DynamicGetNext) + .INPUT(x, TensorType::ALL()) + .DYNAMIC_OUTPUT(y, TensorType::ALL()) + .ATTR(output_types, ListType, {}) + .ATTR(output_shapes, ListListInt, {{}, {}}) + .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") + .ATTR(_getnext_inputs_shape_range, String, "") + .OP_END_FACTORY_REG(DynamicGetNext) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index e65c7027..9f981d12 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -3627,6 +3627,35 @@ REG_OP(Lerp) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(Lerp) +/** +*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n + +* +*@par Inputs: +*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16 +*@li x2: A tensor of the same type as "x1". +* +*@par Attributes: +* atol: Defaults to "1e-05". +* rtol: Defaults to "1e-03". +* +*@par Outputs: +* num: A tensor of type int32. +* diff: A tensor of type float16. +* +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* +*/ +REG_OP(DataCompare) + .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) + .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) + .OUTPUT(num, TensorType({DT_FLOAT})) + .OUTPUT(diff, TensorType({DT_FLOAT16})) + .ATTR(atol, Float, 1e-5) + .ATTR(rtol, Float, 1e-3) + .OP_END_FACTORY_REG(DataCompare) + /** *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along @@ -3650,6 +3679,57 @@ REG_OP(HardMax) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(axis, Int, -1) .OP_END_FACTORY_REG(HardMax) + +/** +* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast. + +* @par Inputs: +* Two inputs, including: +* @li input_x: A Tensor. the first tensor must be 1d. \n +* @li input_y: A Tensor. the second tensor must be 1d. \n + +* @par Outputs: +* @li output: A Tensor. Result of the two inputs, must be 1d. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch dot operator. \n +*/ +REG_OP(Dot) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) + .OP_END_FACTORY_REG(Dot) + +/** +*@brief Returns a new tensor with boolean elements representing \n +*if each element of input is “close” to the corresponding element of other \n + +*@par Inputs: +*Two inputs, including: +* @li x1: A tensor. Must be one of the following types: +* float16, float32, int32. \n +* @li x2: A tensor with the same type and shape of x1's. \n + +*@par Attributes: +*@li rtol: An optional float.Defaults to 1e-05. \n +*@li atol: An optional float.Defaults to 1e-08. \n +*@li equal_nan: An optional bool.Defaults to false. \n + +*@par Outputs: +*y: A Tensor bool with the same shape of x1's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator isclose. \n +*/ +REG_OP(IsClose) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_BOOL})) + .ATTR(rtol, Float, 1e-05) + .ATTR(atol, Float, 1e-08) + .ATTR(equal_nan, Bool, false) + .OP_END_FACTORY_REG(IsClose) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index cb9fbe22..f4ded0cd 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -238,6 +238,15 @@ REG_OP(HcomRemoteRead) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(HcomRemoteRead) +/** + * @brief Performs Remote Ref Read of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length + * cache_var: The local base address + * local_offset: Skip step length + * @par Outputs: + * cache_var: The local base address + */ REG_OP(HcomRemoteRefRead) .INPUT(remote, TensorType({DT_UINT64})) .INPUT(cache_var, TensorType({DT_UINT64})) @@ -258,6 +267,13 @@ REG_OP(HcomRemoteWrite) .INPUT(local, TensorType::ALL()) .OP_END_FACTORY_REG(HcomRemoteWrite) +/** + * @brief Performs Remote Write of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length + * @par Inputs: + * local: A Tensor. whose value is length / size_of(Type) + */ REG_OP(HcomRemoteScatterWrite) .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) .INPUT(local, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index d7f60346..4703705b 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -652,6 +652,62 @@ REG_OP(RGBToHSV) /** *@brief Generate a single randomly distorted bounding box for an image . \n +*@par Inputs: +*Input images must be a 4-D tensor. Inputs include: +*@li image_size: 1-D, containing [height, width, channels]. +*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding +boxes associated with the image. \n + +*@par Attributes: +*@li seed: If either seed or seed2 are set to non-zero, the random number +generator is seeded by the given seed. Otherwise, it is seeded by a random seed. +*@li seed2: A second seed to avoid seed collision. +*@li min_object_covered: The cropped area of the image must contain at least +this fraction of any bounding box supplied. The value of this parameter should +be non-negative. In the case of 0, the cropped area does not need to overlap +any of the bounding boxes supplied . +*@li aspect_ratio_range: The cropped area of the image must have an aspect +ratio = width / height within this range. +*@li max_attempts: Number of attempts at generating a cropped region of the +image of the specified constraints. After max_attempts failures, return the +entire image. +*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes +supplied. If true, assume an implicit bounding box covering the whole input. +If false, raise an error . \n + +*@par Outputs: +*@li begin: 1-D, containing [offset_height, offset_width, 0]. +*@li size: 1-D, containing [target_height, target_width, -1]. +*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n + +*@attention Constraints: +*Input images can be of different types but output images are always float . \n + +*@par Third-party framework compatibility +*Compatible with tensorflow SampleDistortedBoundingBox operator. +*/ + +REG_OP(SampleDistortedBoundingBox) + .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .INPUT(bounding_boxes, TensorType({ DT_FLOAT })) + .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ + DT_INT32, DT_INT64 })) + .OUTPUT(bboxes, TensorType({ DT_FLOAT })) + .ATTR(seed, Int, 0) + .ATTR(seed2, Int, 0) + .ATTR(min_object_covered, Float, 0.1f) + .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) + .ATTR(area_range, ListFloat, { 0.05f, 1.0f }) + .ATTR(max_attempts, Int, 100) + .ATTR(use_image_if_no_bounding_boxes, Bool, false) + .OP_END_FACTORY_REG(SampleDistortedBoundingBox) + +/** +*@brief Generate a single randomly distorted bounding box for an image . \n + *@par Inputs: *Input images must be a 4-D tensor. Inputs include: *@li image_size: 1-D, containing [height, width, channels]. @@ -1424,11 +1480,11 @@ REG_OP(Resize) *@par Attributes: *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. -*@li ratio: An optional int. Defaults to 1. Downscaling ratio. +*@li ratio: An optional int. Defaults to 1. Downscaling ratio. *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. -*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n +*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n *@par Outputs: *image: A Tensor dtype of uint8. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index d8f45c5d..330fef2e 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -61,8 +61,8 @@ REG_OP(CholeskyGrad) *@par Inputs: *The input x has to be symmetric and positive definite.Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape -is [..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, float16, +complex64, complex128. Shape is [..., M, M] . \n *@par Outputs: *y:A Tensor. Has the same type as x . \n @@ -76,8 +76,10 @@ form square matrices. */ REG_OP(Cholesky) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \ + DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \ + DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(Cholesky) /** @@ -87,8 +89,8 @@ of one or more square matrices . \n *@par Inputs: *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape is -[..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, +complex64, complex128. Shape is [..., M, M] . \n *@par Outputs: *@li y:A Tensor. Has the same type as x. @@ -103,9 +105,9 @@ form square matrices. \n */ REG_OP(LogMatrixDeterminant) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(LogMatrixDeterminant) /** @@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant) *@par Inputs: *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float32. Shape is -[..., M, M] . \n +*x:A Tensor. Must be one of the following types: double, float32, complex64, +complex128. Shape is [..., M, M] . \n *@par Outputs: *y:A Tensor. Has the same type as x . \n @@ -129,8 +131,8 @@ form square matrices. */ REG_OP(MatrixDeterminant) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(MatrixDeterminant) /** @@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n *@par Inputs: *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions form square matrices. Inputs include: -*x:A Tensor. Must be one of the following types: double, float. Shape is -[..., M, M] . \n +*x:A Tensor of input. Shape is [..., M, M] . \n *@par Attributes: *adjoint:An optional bool. Defaults to False.Boolean indicating whether to @@ -159,8 +160,10 @@ form square matrices. \n */ REG_OP(MatrixInverse) - .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixInverse) @@ -169,8 +172,7 @@ REG_OP(MatrixInverse) *@par Inputs: *The input rhs must have the same type as matrix. Inputs include: -*@li matrix:A Tensor. Must be one of the following types: double, float. -Shape is [..., M, M]. +*@li matrix:A Tensor of input. Shape is [..., M, M]. *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: @@ -189,9 +191,9 @@ dimensions form square matrices. \n */ REG_OP(MatrixSolve) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixSolve) @@ -221,8 +223,10 @@ dimensions form square matrices. \n */ REG_OP(MatrixSolveLs) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .INPUT(l2, TensorType({DT_DOUBLE})) .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) .ATTR(fast, Bool, true) @@ -234,8 +238,7 @@ matrices by backsubstitution . \n *@par Inputs: *The input rhs must have the same type as matrix. Inputs include: -*@li matrix: A Tensor. Must be one of the following types: double, float. -Shape is [..., M, M]. +*@li matrix: A Tensor. Shape is [..., M, M]. *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n *@par Attributes: @@ -256,9 +259,12 @@ dimensions form square matrices. \n */ REG_OP(MatrixTriangularSolve) - .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) - .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) + .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ + DT_COMPLEX64, DT_COMPLEX128})) .ATTR(lower, Bool, true) .ATTR(adjoint, Bool, false) .OP_END_FACTORY_REG(MatrixTriangularSolve) @@ -268,8 +274,7 @@ REG_OP(MatrixTriangularSolve) *@par Inputs: *The input shape of x must be [..., M, N]. Inputs include: -*x:A Tensor whose shape is [..., M, N]. Must be one of the following types: -double, float . \n +*x:A Tensor whose shape is [..., M, N]. \n *@par Attributes: *full_matrices: An optional bool. Defaults to False. If true, compute @@ -289,9 +294,12 @@ dimensions form matrices of size [M, N]. \n */ REG_OP(Qr) - .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) - .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) - .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) + .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ + DT_COMPLEX64, DT_COMPLEX128 })) .ATTR(full_matrices, Bool, false) .OP_END_FACTORY_REG(Qr) @@ -384,8 +392,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n */ REG_OP(Lu) - .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OUTPUT(p, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(output_idx_type, Type) .OP_END_FACTORY_REG(Lu) @@ -404,8 +412,8 @@ y: Shape is `[..., M, M]` . \n */ REG_OP(MatrixSquareRoot) - .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(MatrixSquareRoot) /** diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h index 292b1dbe..33270ea8 100644 --- a/third_party/fwkacllib/inc/ops/list_ops.h +++ b/third_party/fwkacllib/inc/ops/list_ops.h @@ -192,7 +192,7 @@ REG_OP(TensorListGetItem) .INPUT(element_shape, TensorType({DT_INT32})) .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, - DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) .ATTR(element_dtype, Type, DT_INT32) .OP_END_FACTORY_REG(TensorListGetItem) diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 4cbcc027..50d058ba 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -222,6 +222,24 @@ REG_OP(Bucketize) .REQUIRED_ATTR(boundaries, ListFloat) .OP_END_FACTORY_REG(Bucketize) +/** +*@brief Returns a new tensor with the truncated integer values of the elements of input. \n + +*@par Inputs: +*One inputs, including: +* @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n + +*@par Outputs: +*y: A tensor with the same type and shape of input_x \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Trunc. \n +*/ +REG_OP(Trunc) + .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) + .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) + .OP_END_FACTORY_REG(Trunc) + /** *@brief Computes the sum along sparse segments of a tensor . \n @@ -645,6 +663,7 @@ REG_OP(NLLLoss) .OUTPUT(y, TensorType({DT_FLOAT})) .OUTPUT(total_weight, TensorType({DT_FLOAT})) .ATTR(reduction, String, "mean") + .ATTR(ignore_index, Int, -100) .OP_END_FACTORY_REG(NLLLoss) /** @@ -674,6 +693,7 @@ REG_OP(NLLLossGrad) .INPUT(total_weight, TensorType({DT_FLOAT})) .OUTPUT(x_grad, TensorType({DT_FLOAT})) .ATTR(reduction, String, "mean") + .ATTR(ignore_index, Int, -100) .OP_END_FACTORY_REG(NLLLossGrad) /** @@ -884,6 +904,54 @@ REG_OP(LpNorm) .ATTR(keepdim, Bool, false) .ATTR(epsilon, Float, 1e-12) .OP_END_FACTORY_REG(LpNorm) + +/** +* @brief get complex. + +* @par Inputs: +* @li real: An ND tensor of type float32. double +* @li imag: An ND tensor of type float32. double \n +* +* @par Outputs: +* @li out: An ND tensor of type complex64, complex128 \n +*/ +REG_OP(Complex) + .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) + .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE})) + .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .ATTR(Tout, Type, DT_COMPLEX64) + .OP_END_FACTORY_REG(Complex) + +/** +* @brief deal complex. + +* @par Inputs: +* @li input: An ND tensor of type complex64, complex128 \n +* +* @par Outputs: +* @li output: An ND tensor of type float32. double \n +*/ +REG_OP(Imag) + .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(Tout, Type, DT_FLOAT) + .OP_END_FACTORY_REG(Imag) + +/** +* @brief deal complex. + +* @par Inputs: +* @li input: An ND tensor of type complex64, complex128 \n +* +* @par Outputs: +* @li output: An ND tensor of type float32. double \n +*/ +REG_OP(Angle) + .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) + .ATTR(Tout, Type, DT_FLOAT) + .OP_END_FACTORY_REG(Angle) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 33b596d8..6bff7f82 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1022,6 +1022,27 @@ REG_OP(IndexAdd) .ATTR(axis, Int, 0) .OP_END_FACTORY_REG(IndexAdd) +/** +*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n + +*@par Inputs: +* Two inputs, including: +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. +*@li diagonal:(int, optional) – the diagonal to consider。\n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +* Compatible with the Pytorch operator Triu. +*/ +REG_OP(Triu) + .INPUT(x, TensorType::BasicType()) + .ATTR(diagonal, Int, 0) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(Triu) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index a35cee03..ddd70bc8 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -144,6 +144,64 @@ REG_OP(BatchNorm) /** *@brief Performs batch normalization . \n +*@par Inputs: +* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NDC1HWC0. Specifies the scaling factor. +*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NC1HWC0. Specifies the offset. +*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +operation is used for training. +*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +if the operation is used for training . \n + +*@par Attributes: +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n + +*@par Outputs: +* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. +*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D +if input "x" is with format NDC1HWC0. Specifies the mean of "x". +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x". +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n + +*@attention Constraints: +*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n + +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator fused_batch_norm. +*@li Compatible with the TensorFlow operator fused_batch_norm_v2. +*/ +REG_OP(BatchNorm3D) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NCDHW") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(BatchNorm3D) +/** +*@brief Performs batch normalization . \n + *@par Inputs: * Five inputs, including: (NHWC or NCHW supported) *@li x: A 4D Tensor of type float16 or float32. @@ -242,6 +300,52 @@ REG_OP(BatchNormGrad) /** *@brief Performs the backpropagation of BatchNorm . \n +*@par Inputs: +* Five inputs, including: +*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient. +*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0. +*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0. +*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm. +*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n + +*@par Attributes: +*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*@li data_format: An optional string. Defaults to "NCDHW". +*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n + +*@par Outputs: +*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". +*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale". +*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset". +*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output. +*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n + +*@attention Constraints: +* The preceding layer of this operator must be operator BatchNorm . \n + +*@see BatchNorm +*@par Third-party framework compatibility +* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad. +*/ +REG_OP(BatchNorm3DGrad) + .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(reserve_space_1, TensorType({DT_FLOAT})) + .INPUT(reserve_space_2, TensorType({DT_FLOAT})) + .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) + .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_4, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_5, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NCDHW") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(BatchNorm3DGrad) + +/** +*@brief Performs the backpropagation of BatchNorm . \n + *@par Inputs: * Five inputs, including: *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index c848668f..53922ee6 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -1059,7 +1059,7 @@ REG_OP(DeformableConv2D) *@par Attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A list of 5 integers. Specifies the dilation factor for each @@ -1119,7 +1119,7 @@ REG_OP(Conv3D) *@par Attributes: * Three attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A tuple/list of 5 integers, The dilation factor for each @@ -1167,7 +1167,7 @@ REG_OP(Conv3DBackpropInput) *@par Attributes: * Three attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li dilations: A tuple/list of 5 integers, The dilation factor for each @@ -1267,7 +1267,7 @@ REG_OP(LSTM) * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. @@ -1319,7 +1319,7 @@ REG_OP(Conv3DBackpropFilter) * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. @@ -1369,7 +1369,7 @@ REG_OP(Conv3DBackpropFilterD) *@par Attributes: * Five attributes: * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li dilations: A tuple/list of 5 integers, * The dilation factor for each dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". @@ -1422,7 +1422,7 @@ REG_OP(Conv3DTranspose) * dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". * @li groups: Number of blocked connections from input channels to output - * channels. Reserved. + * channels. * @li data_format: An optional string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li output_padding: The size will be added in the output shape. @@ -1624,7 +1624,7 @@ REG_OP(Conv2DTransposeD) * of the input. * @li ksize: A tuple/list of 2 integers.kernel size. *@par Attributes: - * Three attributes: + * Four attributes: * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 39b4b227..af59b4e2 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -968,8 +968,9 @@ REG_OP(SPP) * Three inputs, including: *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature * map. -*@li rois: A tensor of type float16 or float32, with shape +*@li rois: A tensor of type float16 or float32, with 3D shape * [batch, 5, roi_max_num], describing the RIOs. +* roi_max_num must be less than or equal to 6000 and must be divided by 16. *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying * the number of ROIs per batch . \n @@ -1604,6 +1605,50 @@ REG_OP(NonMaxSuppressionV7) .ATTR(max_boxes_size, Int, 0) .OP_END_FACTORY_REG(NonMaxSuppressionV7) +/** +*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n + +*@par Inputs: +* Three inputs, including: +*@li features: A 5HD Tensor list of type float32 or float16. +*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, +* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". + +*@par Attributes: +*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois". +*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates. +*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features" +* to the original image. +*@li pooled_height: A optional attribute of type int32, specifying the H dimension. +*@li pooled_width: A optional attribute of type int32, specifying the W dimension. +*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency +* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", +* which is a floating point number. Defaults to "0". +*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n +*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n + +*@par Outputs: +* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. +* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", +* "pooled_width", and "features", respectively. + +*@par Third-party framework compatibility +*Compatible with mmdetection SingleRoIExtractor operator. +*/ +REG_OP(RoiExtractor) + .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(finest_scale, Int, 56) + .ATTR(roi_scale_factor, Float, 0) + .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 }) + .ATTR(pooled_height, Int, 7) + .ATTR(pooled_width, Int, 7) + .ATTR(sample_num, Int, 0) + .ATTR(pool_mode, String, "avg") + .ATTR(aligned, Bool, true) + .OP_END_FACTORY_REG(RoiExtractor) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index af223552..00e2020f 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1233,6 +1233,47 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2) .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(reduction, String, "mean") .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) +/** + * @brief Calculate the PoissonNllLoss function. + * target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n + + * @par Inputs: + * Two inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + * + * @par Inputs: + * @li target: A tensor. Must be one of the following types: + * float16, float32. \n + + * @par Attributes: + * four Attributes, including: + * @li log_input: An optional bool. Defaults to "True" \n + * + * @par Attributes: + * @li full: An optional bool. Defaults to "False" \n + * + * @par Attributes: + * @li eps: An optional float. Defaults to "1e-8" \n + * + * @par Attributes: + * @li reduction: An optional string. Defaults to "mean" \n + + * @par Outputs: + * loss: A Tensor has same element type as two inputs. \n + + * @par Third-party framework compatibility + * Compatible with the Pytorch operator PoissonNllLoss. \n + */ +REG_OP(PoissonNllLoss) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(log_input, Bool, true) + .ATTR(full, Bool, false) + .ATTR(eps, Float, 1e-8) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(PoissonNllLoss) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 16552eee..820aa00d 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -20,7 +20,34 @@ */ #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ - +#include "graph/operator_reg.h" #include "nn_pooling_ops.h" +namespace ge { +/** +* @brief Says whether the targets are in the top "k" predictions . \n + +* @par Inputs: +* Three inputs, including: +* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor. +* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. +* @li k: A 1D Tensor of the same type as "targets". +* Specifies the number of top elements to look at for computing precision . \n + +* @par Outputs: +* precision: A Tensor of type bool . \n + +* @attention Constraints: +* @li targets must be non-negative tensor. + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator InTopKV2. +*/ +REG_OP(InTopKV2) + .INPUT(predictions, TensorType({DT_FLOAT})) + .INPUT(targets, TensorType(IndexNumberType)) + .INPUT(k, TensorType({IndexNumberType})) + .OUTPUT(precision, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(InTopKV2) +}// namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index e0897280..a225bb5f 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -223,7 +223,29 @@ REG_OP(Relu6Grad) .INPUT(features, TensorType::RealNumberType()) .OUTPUT(backprops, TensorType::RealNumberType()) .OP_END_FACTORY_REG(Relu6Grad) - +/** +*@brief Calculate the elu_grad_v2 function. +*Applies the element-wise function: +* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. +* @li activations: A tensor. Must be one of the following types: +* float16, float32. +* +*@par Outputs: +*y: A Tensor with the same type and shape of grads's. +* +*@par Attributes: +*@li alpha: scalar parameter, default value = 1.0 +*/ +REG_OP(EluGradV2) + .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 1.0) + .OP_END_FACTORY_REG(EluGradV2) /** * @brief Compute sigmoid of "x" element-wise . \n @@ -842,6 +864,26 @@ REG_OP(SoftShrinkGrad) .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) .ATTR(lambd, Float, 0.5) .OP_END_FACTORY_REG(SoftShrinkGrad) + +/** +*@brief Calculate -ln(1+e^(-x)). \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li y: A tensor with the same type and shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoid. \n +*/ +REG_OP(LogSigmoid) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ + .OP_END_FACTORY_REG(LogSigmoid) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 0b114134..5b97d226 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -37,7 +37,7 @@ namespace ge { *@attention Constraints: * This operator is a BatchNorm fusion operator for updating the moving * averages for training. -* This operator is used in conjunction with BNTrainingUpdate. +* This operator is used in conjunction with BNTrainingReduce. */ REG_OP(BNTrainingReduce) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce) .OUTPUT(square_sum, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingReduce) +/** +*@brief Performs reduced batch normalization . \n + +*@par Inputs: +*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n + +*@par Outputs: +*@li sum: A 3D Tensor of type float32 for SUM reduced "x". +*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n + +*@attention Constraints: +* This operator is a BatchNorm fusion operator for updating the moving +* averages for training. +* This operator is used in conjunction with BN3DTrainingReduce. +*/ +REG_OP(BN3DTrainingReduce) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(sum, TensorType({DT_FLOAT})) + .OUTPUT(square_sum, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingReduce) + /** *@brief Performs the backpropagation of BatchNorm . \n @@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad) .ATTR(epsilon, Float, 0.0001) .OP_END_FACTORY_REG(BNTrainingReduceGrad) +/** +*@brief Performs the backpropagation of BatchNorm . \n + +*@par Inputs: +* Seven inputs, including: +*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for +* the gradient. +*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. +*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x". +*@li scale: A 6D Tensor of type float32, with format NDC1HWC0. +*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x" . \n + +*@par Attributes: +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x" . \n + +*@par Outputs: +*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset +* of "x" . \n + +*@attention Constraints: +* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n + +*@see BN3DTrainingReduceGrad +*/ +REG_OP(BN3DTrainingReduceGrad) + .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(diff_scale, TensorType({DT_FLOAT})) + .INPUT(diff_offset, TensorType({DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(batch_mean, TensorType({DT_FLOAT})) + .INPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .OP_END_FACTORY_REG(BN3DTrainingReduceGrad) + /** *@brief Performs reduced batch normalization . \n @@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad) *@attention Constraints: *@li This operator is a BatchNorm fusion operator for updating the moving averages for training. -*This operator is used in conjunction with BNTrainingReduce. +*This operator is used in conjunction with BNTrainingUpdate. *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square * root instruction. */ @@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingUpdate) +/** +*@brief Performs reduced batch normalization . \n + +*@par Inputs: +* Seven inputs, including: (NDC1HWC0 supported) +*@li x: A 6D Tensor of type float16 or float32. +*@li sum: A 6D Tensor of type float32 for the output of operator +* BN3DTrainingUpdate. +*@li square_sum: A 6D Tensor of type float32 for the output of operator +* BN3DTrainingUpdate. +*@li scale: A 6D Tensor of type float32, for the scaling factor. +*@li offset: A 6D Tensor of type float32, for the scaling offset. +*@li mean: A 6D Tensor of type float32, for the updated mean. +*@li variance: A 6D Tensor of type float32, for the updated variance . \n + +*@par Attributes: +*@li epsilon: A required float32, specifying the small value added to variance +* to avoid dividing by zero. +*@li factor: A required float32, specifying the weight for updating the mean +* and variance . \n + +*@par Outputs: +* Five outputs, including: (NDC1HWC0 supported) +*@li y: A 6D Tensor of type float16 or float32, for normalized "x". +*@li mean: A 6D Tensor of type float32, for the updated mean. +*@li variance: A 6D Tensor of type float32, for the updated variance. +*@li batch_mean: A 6D Tensor of type float32, for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n + +*@attention Constraints: +*@li This operator is a BatchNorm fusion operator for updating the moving +averages for training. +*This operator is used in conjunction with BN3DTrainingUpdate. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +* root instruction. +*/ +REG_OP(BN3DTrainingUpdate) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(sum, TensorType({DT_FLOAT})) + .INPUT(square_sum, TensorType({DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(factor, Float) + .REQUIRED_ATTR(epsilon, Float) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(mean, TensorType({DT_FLOAT})) + .OUTPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingUpdate) + /** *@brief Performs batch normalization for inference . \n @@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad) .OUTPUT(diff_offset, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingUpdateGrad) +/** +*@brief Performs the backpropagation of BatchNorm . \n + +*@par Inputs: +* Four inputs, including: +*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, +* for the gradient. +*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. +*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, +* for the mean of "x". +*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, +* for the variance of "x" . \n + +*@par Attributes: +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x" . \n + +*@par Outputs: +*@li diff_scale: A Tensor of type float32, with format NDC1HWC0, +* for the offset of "scale". +*@li diff_offset: A Tensor of type float32, with format NDC1HWC0, +* for the offset of "offset" . \n + +*/ +REG_OP(BN3DTrainingUpdateGrad) + .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(batch_mean, TensorType({DT_FLOAT})) + .INPUT(batch_variance, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .OUTPUT(diff_scale, TensorType({DT_FLOAT})) + .OUTPUT(diff_offset, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad) + /** *@brief Performs the backpropagation of BatchNorm for inference . \n diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index dee9e0f7..33980d43 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -857,8 +857,8 @@ REG_OP(SliceDV2) * @li sorted = true * @li It's unstable sorted indices on the platform of Ascend310 -* @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator TopK. +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead. */ REG_OP(TopKD) .INPUT(x, TensorType::RealNumberType()) @@ -883,6 +883,44 @@ REG_OP(TopKD) * Number of top elements to look for along the last dimension (along each row * for matrices) . \n +* @par Attributes: +* @li sorted: An optional bool. Defaults to true. +* If true, the resulting "k" elements will be sorted by the values in descending +* order. +* @li dim: An optional int. Defaults to -1. For reserved use. +* @li largest: An optional bool. Defaults to true. For reserved use. \n + +* @par Outputs: +* @li values: A Tensor, specifying the sorted data. Has the same type as +* "input". +* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n + +* @see TopK() +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator TopKV2. +*/ +REG_OP(TopKV2) + .INPUT(x, TensorType::RealNumberType()) + .INPUT(k, TensorType({DT_INT32})) + .OUTPUT(values, TensorType::RealNumberType()) + .OUTPUT(indices, TensorType({DT_INT32})) + .ATTR(sorted, Bool, true) + .ATTR(dim, Int, -1) + .ATTR(largest, Bool, true) + .OP_END_FACTORY_REG(TopKV2) + +/** +* @brief Finds values and indices of the "k" largest elements for the last +* dimension . \n + +* @par Inputs: +* Two inputs, including: +* @li x: A 1D or higher tensor of type BasicType, with the last dimension +* at least "k". +* @li k: A 0D Tensor of type int32. +* Number of top elements to look for along the last dimension (along each row +* for matrices) . \n + * @par Attributes: * @li sorted: An optional bool. Defaults to true. * If true, the resulting "k" elements will be sorted by the values in descending @@ -2103,6 +2141,34 @@ REG_OP(StridedSliceV2) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(StridedSliceV2) +/** +*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n + +*@par Inputs: +*Three inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32, int32. \n +*@li assist1: A tensor. Must be one of the following types: +* float16, float32, int32. \n +*@li assist2: A tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Attributes: +* @li dim: A required int. Used to select the dimension of this tensor. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator IndexFill. \n +*/ +REG_OP(IndexFillD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(dim, Int) + .OP_END_FACTORY_REG(IndexFillD) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 09d8ced9..a1fc9ee6 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad) REG_OP(SparseTensorDenseMatMul) .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ - DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16})) + DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64})) .INPUT(x1_shape, TensorType({DT_INT64})) - .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ + .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ DT_COMPLEX128, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ + .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ DT_COMPLEX128, DT_FLOAT16})) .ATTR(adjoint_a, Bool, false) .ATTR(adjoint_b, Bool, false) diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index be3d7d00..82accc73 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -26,6 +26,24 @@ namespace ge { +/** +*@brief Computes the inverse 1-dimensional discrete Fourier transform over the +inner-most dimension of `x`. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor of the same rank as `x`. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IFFT operator. +*/ +REG_OP(IFFT) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(IFFT) + /** *@brief Real-valued fast Fourier transform . \n @@ -47,6 +65,84 @@ REG_OP(RFFT) .OUTPUT(y, TensorType({DT_COMPLEX64})) .OP_END_FACTORY_REG(RFFT) +/** +*@brief Inverse real-valued fast Fourier transform . \n + +*@par Inputs: +*@li x: A complex64 tensor. +*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n + +*@par Outputs: +*@li y: A float32 tensor of the same rank as `input`. The inner-most + dimension of `input` is replaced with the `fft_length` samples of its inverse + 1D Fourier transform . \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IRFFT operator. +*/ +REG_OP(IRFFT) + .INPUT(x, TensorType({DT_COMPLEX64})) + .INPUT(fft_length, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(IRFFT) + + +/** +*@brief 2D fast Fourier transform. \n + +*@par Inputs: +*@li x: A complex64 tensor.. + +*@par Outputs: +*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2 + dimensions of `input` are replaced with their 2D Fourier transform.\n + +*@par Third-party framework compatibility +* Compatible with TensorFlow FFT2D operator. +*/ +REG_OP(FFT2D) + .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128})) + .OP_END_FACTORY_REG(FFT2D) + +/** +*@brief Calculate the one-dimensional discrete Fourier transform on the +innermost dimension of the input. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor with the same shape as input. The innermost dimension +of the input is replaced by its 1-dimensional Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow FFT operator. +*/ +REG_OP(FFT) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(FFT) + +/** +*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the +innermost dimension of the input. \n + +*@par Inputs: +*@li x: A Tensor. Must be the following types: complex64, complex128. \n + +*@par Outputs: +*@li y: A complex tensor with the same shape as input. The innermost dimension +of the input is replaced by its inverse two-dimensional Fourier transform. \n + +*@par Third-party framework compatibility +* Compatible with TensorFlow IFFT2D operator. +*/ +REG_OP(IFFT2D) + .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) + .OP_END_FACTORY_REG(IFFT2D) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index f1a93fa6..af2c37bc 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -62,8 +62,8 @@ REG_OP(Split) *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: -*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. +*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n @@ -94,12 +94,12 @@ REG_OP(SplitD) *@par Inputs: * Three inputs, including: *@li x: An ND Tensor. -*Must be one of the following types: -*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. -*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n +*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension. +*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n *@par Attributes: -*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n @@ -129,9 +129,9 @@ REG_OP(SplitV) *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: -*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. -*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. -*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n +*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension. +*@li split_dim: A required int32. Specifies the dimension along which to split. No default value. +*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n *@par Outputs: *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index ec84cc83..29aec302 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -488,7 +488,7 @@ include: */ REG_OP(AsString) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ - DT_DOUBLE, DT_BOOL})) + DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128})) .OUTPUT(y, TensorType({DT_STRING})) .ATTR(precision, Int, -1) .ATTR(scientific, Bool, false) diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index ee104693..fed7341a 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -46,6 +46,12 @@ typedef enum tagRtChipType { CHIP_END, } rtChipType_t; +typedef enum tagRtAicpuScheType { + SCHEDULE_SOFTWARE = 0, /* Software Schedule */ + SCHEDULE_SOFTWARE_OPT, + SCHEDULE_HARDWARE, /* HWTS Schedule */ +} rtAicpuScheType; + typedef enum tagRtVersion { VER_BEGIN = 0, VER_NA = VER_BEGIN, @@ -184,6 +190,19 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); */ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); + +/** + * @ingroup + * @brief get device feature ability by device id, such as task schedule ability. + * @param [in] deviceId + * @param [in] moduleType + * @param [in] featureType + * @param [out] value + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 49f6a3f6..018f4e6c 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -62,6 +62,11 @@ typedef enum tagRtFeatureType { FEATURE_TYPE_RSV } rtFeatureType_t; +typedef enum tagRtDeviceFeatureType { + FEATURE_TYPE_SCHE, + FEATURE_TYPE_END, +} rtDeviceFeatureType_t; + typedef enum tagMemcpyInfo { MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, MEMCPY_INFO_RSV diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index dc16ca58..0ec1a163 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); #define RT_FUSION_KERNEL_DUMPFLAG (0x04) #define RT_KERNEL_CUSTOM_AICPU (0x08) +/** + * @ingroup rt_kernel + * @brief kernel mode +**/ +#define RT_DEFAULT_KERNEL_MODE (0x00) +#define RT_NORMAL_KERNEL_MODE (0x01) +#define RT_ALL_KERNEL_MODE (0x02) + /** * @ingroup rt_kernel * @brief kernel L1 Fusion Dump bit flags @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); */ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); +/** + * @ingroup rt_kernel + * @brief register device binary with all kernel + * @param [in] bin device binary description + * @param [out] handle device binary handle + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); + /** * @ingroup rt_kernel * @brief register fast memeory device binary @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream); +/** + * @ingroup rt_kernel + * @brief launch kernel with handle to device + * @param [in] handle program + * @param [in] devFunc device function description. + * @param [in] blockDim block dimentions + * @param [in] args argments address for kernel function + * @param [in] argsSize argements size + * @param [in] smDesc shared memory description + * @param [in] stream associated stream + * @param [in] kernelInfo kernel info + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, + rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); + /** * @ingroup rt_kernel * @brief launch kernel to device diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 482486a8..e6d849c8 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, + RT_MODEL_TASK_ALL_KERNEL, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -127,6 +128,18 @@ typedef struct tagKernelTaskInfo { uint16_t *argsOffset; } rtKernelTaskInfo_t; +typedef struct tagAllKernelTaskInfo { + uint16_t blockDim; + uint16_t argsCount; + uint16_t argsSize; + uint16_t reserved; + void *devfunc; + void *handle; + uint8_t *smDesc; + uint8_t *args; + uint16_t *argsOffset; +} rtAllKernelTaskInfo_t; + typedef struct tagKernelTaskInfoEx { uint32_t flags; uint32_t argsSize; @@ -251,6 +264,7 @@ typedef struct tagTaskInfo { union { rtKernelTaskInfoEx_t kernelTaskEx; rtKernelTaskInfo_t kernelTask; + rtAllKernelTaskInfo_t allKernelTask; rtEventTaskInfo_t eventTask; rtStreamSwitchTaskInfo_t streamSwitchTask; rtStreamActiveTaskInfo_t streamActiveTask; diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 6208f462..e436dafd 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -1,72 +1,137 @@ -/** - * @file tune_api.h - * - * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n - * 描述:mstune调优接口头文件 - */ -/** @defgroup mstune mstune调优接口 */ -#ifndef TUNE_API_H -#define TUNE_API_H -#include -#include -#include -#include "graph/graph.h" -#include "ge/ge_api.h" - -/** - * @ingroup mstune - * - * mstune status - */ -enum MsTuneStatus { - MSTUNE_SUCCESS, /** tune success */ - MSTUNE_FAILED, /** tune failed */ -}; - -// Option key: for train options sets -const std::string MSTUNE_SELF_KEY = "mstune"; -const std::string MSTUNE_GEINIT_KEY = "initialize"; -const std::string MSTUNE_GESESS_KEY = "session"; - -/** - * @ingroup mstune - * @par 描述: 命令行调优 - * - * @attention 无 - * @param option [IN] 调优参数 - * @param msg [OUT] 调优异常下返回信息 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 - * @par 依赖: - * @li tune_api.cpp:该接口所属的开发包。 - * @li tune_api.h:该接口声明所在的头文件。 - * @see 无 - * @since - */ -MsTuneStatus MsTuning(const std::map &option, std::string &msg); - -/** - * @ingroup mstune - * @par 描述: 梯度调优 - * - * @attention 无 - * @param tuningGraph [IN] 调优图 - * @param dependGraph [IN] 调优依赖图 - * @param session [IN] ge连接会话 - * @param option [IN] 参数集. 包含调优参数及ge参数 - * @retval #MSTUNE_SUCCESS 执行成功 - * @retval #MSTUNE_FAILED 执行失败 - * @par 依赖: - * @li tune_api.cpp:该接口所属的开发包。 - * @li tune_api.h:该接口声明所在的头文件。 - * @see 无 - * @since - */ -extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, - ge::Session *session, const std::map> &option); - -#endif +/** + * @file tune_api.h + * + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * 描述:mstune调优接口头文件 + */ +/** @defgroup mstune mstune调优接口 */ +#ifndef TUNE_API_H +#define TUNE_API_H +#include +#include +#include +#include "graph/graph.h" +#include "ge/ge_api.h" + +/** + * @ingroup mstune + * + * mstune status + */ +enum MsTuneStatus { + MSTUNE_SUCCESS, /** tune success */ + MSTUNE_FAILED, /** tune failed */ +}; + +// Option key: for train options sets +const std::string MSTUNE_SELF_KEY = "mstune"; +const std::string MSTUNE_GEINIT_KEY = "initialize"; +const std::string MSTUNE_GESESS_KEY = "session"; + +#ifdef __cplusplus +extern "C" { +#endif + +struct RunnerInitConfig { + // onilne online + std::string profPath; + std::string parserPath; + // ncs only + std::vector devList; +}; + +struct RunnerOpInfo { + std::string opName; + uint64_t opCostTime; + uint64_t aicoreCostTime; + // gradient_split only + std::string modelName; + std::string opType; + std::vector start; + std::vector end; +}; + +struct RunnerModelInfo { + uint64_t totalCostTime; +}; + +struct RunnerRunResult { + std::vector modelInfo; + std::vector opInfo; +}; + +struct RunnerResult { + uint64_t totalCostTime; + std::map opCostTime; + std::map aicoreCostTime; +}; + +struct RunnerDataBuf { + void *ptr = nullptr; + size_t size = 0; +}; + +struct AOEBufferData { + std::shared_ptr data = nullptr; + uint64_t length; +}; + +struct RunnerConfig { + bool isProf; + uint32_t loop; + // offline only + std::vector input; + std::vector output; + std::string modelPath; + RunnerDataBuf modelData; + // online only + uint32_t devId; + std::vector> inputs; + std::vector dependGraph; // run graph (for training) +}; +#ifdef __cplusplus +} +#endif + +/** + * @ingroup mstune + * @par 描述: 命令行调优 + * + * @attention 无 + * @param option [IN] 调优参数 + * @param msg [OUT] 调优异常下返回信息 + * @retval #MSTUNE_SUCCESS 执行成功 + * @retval #MSTUNE_FAILED 执行失败 + * @par 依赖: + * @li tune_api.cpp:该接口所属的开发包。 + * @li tune_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +MsTuneStatus MsTuning(const std::map &option, std::string &msg); + +/** + * @ingroup mstune + * @par 描述: 梯度调优 + * + * @attention 无 + * @param tuningGraph [IN] 调优图 + * @param dependGraph [IN] 调优依赖图 + * @param session [IN] ge连接会话 + * @param option [IN] 参数集. 包含调优参数及ge参数 + * @retval #MSTUNE_SUCCESS 执行成功 + * @retval #MSTUNE_FAILED 执行失败 + * @par 依赖: + * @li tune_api.cpp:该接口所属的开发包。 + * @li tune_api.h:该接口声明所在的头文件。 + * @see 无 + * @since + */ +extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector &dependGraph, + ge::Session *session, const std::map> &option); + +#endif