| @@ -26,9 +26,9 @@ extern "C" { | |||||
| #endif | #endif | ||||
| // Current version is 1.0.0 | // Current version is 1.0.0 | ||||
| #define ACL_MAJOR_VERSION 1 | |||||
| #define ACL_MINOR_VERSION 0 | |||||
| #define ACL_PATCH_VERSION 0 | |||||
| #define ACL_MAJOR_VERSION 1 | |||||
| #define ACL_MINOR_VERSION 0 | |||||
| #define ACL_PATCH_VERSION 0 | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -70,4 +70,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_H_ | |||||
| @@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045; | |||||
| static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; | static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046; | ||||
| static const int ACL_ERROR_PROF_API_CONFLICT = 148047; | static const int ACL_ERROR_PROF_API_CONFLICT = 148047; | ||||
| static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; | static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048; | ||||
| static const int ACL_ERROR_INVALID_OPP_PATH = 148049; | |||||
| static const int ACL_ERROR_BAD_ALLOC = 200000; | static const int ACL_ERROR_BAD_ALLOC = 200000; | ||||
| static const int ACL_ERROR_API_NOT_SUPPORT = 200001; | static const int ACL_ERROR_API_NOT_SUPPORT = 200001; | ||||
| @@ -134,42 +135,42 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||||
| #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | ||||
| typedef enum { | typedef enum { | ||||
| ACL_DT_UNDEFINED = -1, | |||||
| ACL_FLOAT = 0, | |||||
| ACL_FLOAT16 = 1, | |||||
| ACL_INT8 = 2, | |||||
| ACL_INT32 = 3, | |||||
| ACL_UINT8 = 4, | |||||
| ACL_INT16 = 6, | |||||
| ACL_UINT16 = 7, | |||||
| ACL_UINT32 = 8, | |||||
| ACL_INT64 = 9, | |||||
| ACL_UINT64 = 10, | |||||
| ACL_DOUBLE = 11, | |||||
| ACL_BOOL = 12, | |||||
| ACL_STRING = 13, | |||||
| ACL_DT_UNDEFINED = -1, | |||||
| ACL_FLOAT = 0, | |||||
| ACL_FLOAT16 = 1, | |||||
| ACL_INT8 = 2, | |||||
| ACL_INT32 = 3, | |||||
| ACL_UINT8 = 4, | |||||
| ACL_INT16 = 6, | |||||
| ACL_UINT16 = 7, | |||||
| ACL_UINT32 = 8, | |||||
| ACL_INT64 = 9, | |||||
| ACL_UINT64 = 10, | |||||
| ACL_DOUBLE = 11, | |||||
| ACL_BOOL = 12, | |||||
| ACL_STRING = 13, | |||||
| } aclDataType; | } aclDataType; | ||||
| typedef enum { | typedef enum { | ||||
| ACL_FORMAT_UNDEFINED = -1, | |||||
| ACL_FORMAT_NCHW = 0, | |||||
| ACL_FORMAT_NHWC = 1, | |||||
| ACL_FORMAT_ND = 2, | |||||
| ACL_FORMAT_NC1HWC0 = 3, | |||||
| ACL_FORMAT_FRACTAL_Z = 4, | |||||
| ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
| ACL_FORMAT_NDHWC = 27, | |||||
| ACL_FORMAT_FRACTAL_NZ = 29, | |||||
| ACL_FORMAT_NCDHW = 30, | |||||
| ACL_FORMAT_NDC1HWC0 = 32, | |||||
| ACL_FRACTAL_Z_3D = 33 | |||||
| ACL_FORMAT_UNDEFINED = -1, | |||||
| ACL_FORMAT_NCHW = 0, | |||||
| ACL_FORMAT_NHWC = 1, | |||||
| ACL_FORMAT_ND = 2, | |||||
| ACL_FORMAT_NC1HWC0 = 3, | |||||
| ACL_FORMAT_FRACTAL_Z = 4, | |||||
| ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
| ACL_FORMAT_NDHWC = 27, | |||||
| ACL_FORMAT_FRACTAL_NZ = 29, | |||||
| ACL_FORMAT_NCDHW = 30, | |||||
| ACL_FORMAT_NDC1HWC0 = 32, | |||||
| ACL_FRACTAL_Z_3D = 33 | |||||
| } aclFormat; | } aclFormat; | ||||
| typedef enum { | typedef enum { | ||||
| ACL_DEBUG = 0, | |||||
| ACL_INFO = 1, | |||||
| ACL_WARNING = 2, | |||||
| ACL_ERROR = 3, | |||||
| ACL_DEBUG = 0, | |||||
| ACL_INFO = 1, | |||||
| ACL_WARNING = 2, | |||||
| ACL_ERROR = 3, | |||||
| } aclLogLevel; | } aclLogLevel; | ||||
| /** | /** | ||||
| @@ -304,7 +305,9 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); | |||||
| * @retval aclTensorDesc pointer. | * @retval aclTensorDesc pointer. | ||||
| * @retval nullptr if param is invalid or run out of memory | * @retval nullptr if param is invalid or run out of memory | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, | |||||
| ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, | |||||
| int numDims, | |||||
| const int64_t *dims, | |||||
| aclFormat format); | aclFormat format); | ||||
| /** | /** | ||||
| @@ -326,7 +329,8 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||||
| ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc, | |||||
| size_t dimsCount, | |||||
| int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | ||||
| /** | /** | ||||
| @@ -423,7 +427,9 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, | |||||
| ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, | |||||
| size_t index, | |||||
| size_t dimRangeNum, | |||||
| int64_t *dimRange); | int64_t *dimRange); | ||||
| /** | /** | ||||
| @@ -460,7 +466,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | ||||
| aclTensorDesc **dstDesc); | |||||
| aclTensorDesc **dstDesc); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -548,7 +554,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu | |||||
| * | * | ||||
| * @retval null for failed. | * @retval null for failed. | ||||
| * @retval OtherValues success. | * @retval OtherValues success. | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | ||||
| /** | /** | ||||
| @@ -559,7 +565,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, | |||||
| * | * | ||||
| * @retval null for failed | * @retval null for failed | ||||
| * @retval OtherValues success | * @retval OtherValues success | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | ||||
| /** | /** | ||||
| @@ -599,12 +605,13 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBu | |||||
| * @param ... [IN] the value of current log | * @param ... [IN] the value of current log | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | ||||
| const char *fmt, ...); | |||||
| const char *fmt, ...); | |||||
| #define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
| #define ACL_APP_LOG(level, fmt, ...) \ | |||||
| aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
| @@ -27,19 +27,19 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| #define ACL_MAX_DIM_CNT 128 | |||||
| #define ACL_MAX_TENSOR_NAME_LEN 128 | |||||
| #define ACL_MAX_BATCH_NUM 128 | |||||
| #define ACL_MAX_HW_NUM 128 | |||||
| #define ACL_MAX_SHAPE_COUNT 128 | |||||
| #define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||||
| #define ACL_MDL_LOAD_FROM_FILE 1 | |||||
| #define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||||
| #define ACL_MDL_LOAD_FROM_MEM 3 | |||||
| #define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||||
| #define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||||
| #define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||||
| #define ACL_MAX_DIM_CNT 128 | |||||
| #define ACL_MAX_TENSOR_NAME_LEN 128 | |||||
| #define ACL_MAX_BATCH_NUM 128 | |||||
| #define ACL_MAX_HW_NUM 128 | |||||
| #define ACL_MAX_SHAPE_COUNT 128 | |||||
| #define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||||
| #define ACL_MDL_LOAD_FROM_FILE 1 | |||||
| #define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||||
| #define ACL_MDL_LOAD_FROM_MEM 3 | |||||
| #define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||||
| #define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||||
| #define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||||
| #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | ||||
| #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | ||||
| @@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo; | |||||
| typedef struct aclmdlConfigHandle aclmdlConfigHandle; | typedef struct aclmdlConfigHandle aclmdlConfigHandle; | ||||
| typedef enum { | typedef enum { | ||||
| ACL_YUV420SP_U8 = 1, | |||||
| ACL_XRGB8888_U8, | |||||
| ACL_RGB888_U8, | |||||
| ACL_YUV400_U8, | |||||
| ACL_NC1HWC0DI_FP16, | |||||
| ACL_NC1HWC0DI_S8, | |||||
| ACL_ARGB8888_U8, | |||||
| ACL_YUYV_U8, | |||||
| ACL_YUV422SP_U8, | |||||
| ACL_AYUV444_U8, | |||||
| ACL_RAW10, | |||||
| ACL_RAW12, | |||||
| ACL_RAW16, | |||||
| ACL_RAW24, | |||||
| ACL_AIPP_RESERVED = 0xffff, | |||||
| ACL_YUV420SP_U8 = 1, | |||||
| ACL_XRGB8888_U8, | |||||
| ACL_RGB888_U8, | |||||
| ACL_YUV400_U8, | |||||
| ACL_NC1HWC0DI_FP16, | |||||
| ACL_NC1HWC0DI_S8, | |||||
| ACL_ARGB8888_U8, | |||||
| ACL_YUYV_U8, | |||||
| ACL_YUV422SP_U8, | |||||
| ACL_AYUV444_U8, | |||||
| ACL_RAW10, | |||||
| ACL_RAW12, | |||||
| ACL_RAW16, | |||||
| ACL_RAW24, | |||||
| ACL_AIPP_RESERVED = 0xffff, | |||||
| } aclAippInputFormat; | } aclAippInputFormat; | ||||
| typedef enum { | typedef enum { | ||||
| ACL_MDL_PRIORITY_INT32 = 0, | |||||
| ACL_MDL_LOAD_TYPE_SIZET, | |||||
| ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||||
| ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||||
| ACL_MDL_MEM_SIZET, | |||||
| ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||||
| ACL_MDL_WEIGHT_SIZET, | |||||
| ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||||
| ACL_MDL_WORKSPACE_SIZET, | |||||
| ACL_MDL_INPUTQ_NUM_SIZET, | |||||
| ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||||
| ACL_MDL_OUTPUTQ_NUM_SIZET, | |||||
| ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||||
| ACL_MDL_PRIORITY_INT32 = 0, | |||||
| ACL_MDL_LOAD_TYPE_SIZET, | |||||
| ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||||
| ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||||
| ACL_MDL_MEM_SIZET, | |||||
| ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||||
| ACL_MDL_WEIGHT_SIZET, | |||||
| ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||||
| ACL_MDL_WORKSPACE_SIZET, | |||||
| ACL_MDL_INPUTQ_NUM_SIZET, | |||||
| ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||||
| ACL_MDL_OUTPUTQ_NUM_SIZET, | |||||
| ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||||
| } aclmdlConfigAttr; | } aclmdlConfigAttr; | ||||
| typedef enum { | typedef enum { | ||||
| ACL_DATA_WITHOUT_AIPP = 0, | |||||
| ACL_DATA_WITH_STATIC_AIPP, | |||||
| ACL_DATA_WITH_DYNAMIC_AIPP, | |||||
| ACL_DYNAMIC_AIPP_NODE | |||||
| ACL_DATA_WITHOUT_AIPP = 0, | |||||
| ACL_DATA_WITH_STATIC_AIPP, | |||||
| ACL_DATA_WITH_DYNAMIC_AIPP, | |||||
| ACL_DYNAMIC_AIPP_NODE | |||||
| } aclmdlInputAippType; | } aclmdlInputAippType; | ||||
| typedef struct aclmdlIODims { | typedef struct aclmdlIODims { | ||||
| char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||||
| size_t dimCount; /**< dim array count */ | |||||
| int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||||
| char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||||
| size_t dimCount; /**< dim array count */ | |||||
| int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||||
| } aclmdlIODims; | } aclmdlIODims; | ||||
| typedef struct aclAippDims { | typedef struct aclAippDims { | ||||
| aclmdlIODims srcDims; /**< input dims before model transform */ | |||||
| size_t srcSize; /**< input size before model transform */ | |||||
| aclmdlIODims aippOutdims; /**< aipp output dims */ | |||||
| size_t aippOutSize; /**< aipp output size */ | |||||
| aclmdlIODims srcDims; /**< input dims before model transform */ | |||||
| size_t srcSize; /**< input size before model transform */ | |||||
| aclmdlIODims aippOutdims; /**< aipp output dims */ | |||||
| size_t aippOutSize; /**< aipp output size */ | |||||
| } aclAippDims; | } aclAippDims; | ||||
| typedef struct aclmdlBatch { | typedef struct aclmdlBatch { | ||||
| size_t batchCount; /**< batch array count */ | |||||
| uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||||
| size_t batchCount; /**< batch array count */ | |||||
| uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||||
| } aclmdlBatch; | } aclmdlBatch; | ||||
| typedef struct aclmdlHW { | typedef struct aclmdlHW { | ||||
| size_t hwCount; /**< height&width array count */ | |||||
| uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||||
| size_t hwCount; /**< height&width array count */ | |||||
| uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||||
| } aclmdlHW; | } aclmdlHW; | ||||
| typedef struct aclAippInfo { | typedef struct aclAippInfo { | ||||
| aclAippInputFormat inputFormat; | |||||
| int32_t srcImageSizeW; | |||||
| int32_t srcImageSizeH; | |||||
| int8_t cropSwitch; | |||||
| int32_t loadStartPosW; | |||||
| int32_t loadStartPosH; | |||||
| int32_t cropSizeW; | |||||
| int32_t cropSizeH; | |||||
| int8_t resizeSwitch; | |||||
| int32_t resizeOutputW; | |||||
| int32_t resizeOutputH; | |||||
| int8_t paddingSwitch; | |||||
| int32_t leftPaddingSize; | |||||
| int32_t rightPaddingSize; | |||||
| int32_t topPaddingSize; | |||||
| int32_t bottomPaddingSize; | |||||
| int8_t cscSwitch; | |||||
| int8_t rbuvSwapSwitch; | |||||
| int8_t axSwapSwitch; | |||||
| int8_t singleLineMode; | |||||
| int32_t matrixR0C0; | |||||
| int32_t matrixR0C1; | |||||
| int32_t matrixR0C2; | |||||
| int32_t matrixR1C0; | |||||
| int32_t matrixR1C1; | |||||
| int32_t matrixR1C2; | |||||
| int32_t matrixR2C0; | |||||
| int32_t matrixR2C1; | |||||
| int32_t matrixR2C2; | |||||
| int32_t outputBias0; | |||||
| int32_t outputBias1; | |||||
| int32_t outputBias2; | |||||
| int32_t inputBias0; | |||||
| int32_t inputBias1; | |||||
| int32_t inputBias2; | |||||
| int32_t meanChn0; | |||||
| int32_t meanChn1; | |||||
| int32_t meanChn2; | |||||
| int32_t meanChn3; | |||||
| float minChn0; | |||||
| float minChn1; | |||||
| float minChn2; | |||||
| float minChn3; | |||||
| float varReciChn0; | |||||
| float varReciChn1; | |||||
| float varReciChn2; | |||||
| float varReciChn3; | |||||
| aclFormat srcFormat; | |||||
| aclDataType srcDatatype; | |||||
| size_t srcDimNum; | |||||
| size_t shapeCount; | |||||
| aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||||
| aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||||
| aclAippInputFormat inputFormat; | |||||
| int32_t srcImageSizeW; | |||||
| int32_t srcImageSizeH; | |||||
| int8_t cropSwitch; | |||||
| int32_t loadStartPosW; | |||||
| int32_t loadStartPosH; | |||||
| int32_t cropSizeW; | |||||
| int32_t cropSizeH; | |||||
| int8_t resizeSwitch; | |||||
| int32_t resizeOutputW; | |||||
| int32_t resizeOutputH; | |||||
| int8_t paddingSwitch; | |||||
| int32_t leftPaddingSize; | |||||
| int32_t rightPaddingSize; | |||||
| int32_t topPaddingSize; | |||||
| int32_t bottomPaddingSize; | |||||
| int8_t cscSwitch; | |||||
| int8_t rbuvSwapSwitch; | |||||
| int8_t axSwapSwitch; | |||||
| int8_t singleLineMode; | |||||
| int32_t matrixR0C0; | |||||
| int32_t matrixR0C1; | |||||
| int32_t matrixR0C2; | |||||
| int32_t matrixR1C0; | |||||
| int32_t matrixR1C1; | |||||
| int32_t matrixR1C2; | |||||
| int32_t matrixR2C0; | |||||
| int32_t matrixR2C1; | |||||
| int32_t matrixR2C2; | |||||
| int32_t outputBias0; | |||||
| int32_t outputBias1; | |||||
| int32_t outputBias2; | |||||
| int32_t inputBias0; | |||||
| int32_t inputBias1; | |||||
| int32_t inputBias2; | |||||
| int32_t meanChn0; | |||||
| int32_t meanChn1; | |||||
| int32_t meanChn2; | |||||
| int32_t meanChn3; | |||||
| float minChn0; | |||||
| float minChn1; | |||||
| float minChn2; | |||||
| float minChn3; | |||||
| float varReciChn0; | |||||
| float varReciChn1; | |||||
| float varReciChn2; | |||||
| float varReciChn3; | |||||
| aclFormat srcFormat; | |||||
| aclDataType srcDatatype; | |||||
| size_t srcDimNum; | |||||
| size_t shapeCount; | |||||
| aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||||
| aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||||
| } aclAippInfo; | } aclAippInfo; | ||||
| /** | /** | ||||
| @@ -339,7 +339,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, | |||||
| uint32_t *modelId); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -361,8 +362,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSi | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, | |||||
| size_t workSize, void *weightPtr, size_t weightSize); | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, | |||||
| uint32_t *modelId, void *workPtr, size_t workSize, | |||||
| void *weightPtr, size_t weightSize); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -385,9 +387,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, ui | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, | |||||
| void *workPtr, size_t workSize, void *weightPtr, | |||||
| size_t weightSize); | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, | |||||
| uint32_t *modelId, void *workPtr, size_t workSize, | |||||
| void *weightPtr, size_t weightSize); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -422,8 +424,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, | ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, | ||||
| const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, | |||||
| size_t outputQNum); | |||||
| const uint32_t *inputQ, size_t inputQNum, | |||||
| const uint32_t *outputQ, size_t outputQNum); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -453,8 +455,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset | |||||
| * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
| * aclmdlLoadFromMemWithMem | * aclmdlLoadFromMemWithMem | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, | |||||
| aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, | |||||
| aclmdlDataset *output, aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -829,11 +831,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, | |||||
| int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, | |||||
| int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, | |||||
| int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, | |||||
| int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, | |||||
| int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, | |||||
| int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||||
| uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, | uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, | ||||
| uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, | uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, | ||||
| uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); | uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); | ||||
| @@ -849,7 +851,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); | ||||
| /** | /** | ||||
| @@ -863,7 +865,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); | ||||
| /** | /** | ||||
| @@ -878,7 +880,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, | ||||
| int32_t srcImageSizeH); | int32_t srcImageSizeH); | ||||
| @@ -898,10 +900,14 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, | |||||
| int32_t scfInputSizeH, int32_t scfOutputSizeW, | |||||
| int32_t scfOutputSizeH, uint64_t batchIndex); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, | |||||
| int8_t scfSwitch, | |||||
| int32_t scfInputSizeW, | |||||
| int32_t scfInputSizeH, | |||||
| int32_t scfOutputSizeW, | |||||
| int32_t scfOutputSizeH, | |||||
| uint64_t batchIndex); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -919,9 +925,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, in | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, | |||||
| int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, | |||||
| int8_t cropSwitch, | |||||
| int32_t cropStartPosW, | |||||
| int32_t cropStartPosH, | |||||
| int32_t cropSizeW, | |||||
| int32_t cropSizeH, | |||||
| uint64_t batchIndex); | uint64_t batchIndex); | ||||
| /** | /** | ||||
| @@ -940,7 +950,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, i | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, | ||||
| int32_t paddingSizeTop, int32_t paddingSizeBottom, | int32_t paddingSizeTop, int32_t paddingSizeBottom, | ||||
| int32_t paddingSizeLeft, int32_t paddingSizeRight, | int32_t paddingSizeLeft, int32_t paddingSizeRight, | ||||
| @@ -961,10 +971,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, | |||||
| int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, | |||||
| int16_t dtcPixelMeanChn3, uint64_t batchIndex); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||||
| int16_t dtcPixelMeanChn0, | |||||
| int16_t dtcPixelMeanChn1, | |||||
| int16_t dtcPixelMeanChn2, | |||||
| int16_t dtcPixelMeanChn3, | |||||
| uint64_t batchIndex); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -981,10 +994,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, | |||||
| float dtcPixelMinChn1, float dtcPixelMinChn2, | |||||
| float dtcPixelMinChn3, uint64_t batchIndex); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||||
| float dtcPixelMinChn0, | |||||
| float dtcPixelMinChn1, | |||||
| float dtcPixelMinChn2, | |||||
| float dtcPixelMinChn3, | |||||
| uint64_t batchIndex); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1001,10 +1017,13 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| * | * | ||||
| * @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, | |||||
| float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, | |||||
| float dtcPixelVarReciChn3, uint64_t batchIndex); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||||
| float dtcPixelVarReciChn0, | |||||
| float dtcPixelVarReciChn1, | |||||
| float dtcPixelVarReciChn2, | |||||
| float dtcPixelVarReciChn3, | |||||
| uint64_t batchIndex); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1020,8 +1039,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||||
| * | * | ||||
| * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
| * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, | |||||
| aclmdlDataset *dataset, | |||||
| size_t index, | |||||
| const aclmdlAIPP *aippParmsSet); | const aclmdlAIPP *aippParmsSet); | ||||
| /** | /** | ||||
| @@ -1038,8 +1059,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset | |||||
| * | * | ||||
| * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
| * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, | |||||
| aclmdlDataset *dataset, | |||||
| size_t index, | |||||
| const aclmdlAIPP *aippParmsSet); | const aclmdlAIPP *aippParmsSet); | ||||
| /** | /** | ||||
| @@ -1057,8 +1080,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlD | |||||
| * | * | ||||
| * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
| * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, | |||||
| size_t index, | |||||
| aclmdlInputAippType *type, | |||||
| size_t *dynamicAttachedDataIndex); | size_t *dynamicAttachedDataIndex); | ||||
| /** | /** | ||||
| @@ -1075,7 +1100,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a | |||||
| * | * | ||||
| * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
| * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | ||||
| /** | /** | ||||
| @@ -1094,11 +1119,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed | * @retval ACL_SUCCESS The function is successfully executed | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, | |||||
| char *opName, size_t opNameLen, aclTensorDesc **inputDesc, | |||||
| size_t *numInputs, aclTensorDesc **outputDesc, | |||||
| size_t *numOutputs); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, | |||||
| uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs, | |||||
| aclTensorDesc **outputDesc, size_t *numOutputs); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1106,7 +1130,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_ | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | ||||
| /** | /** | ||||
| @@ -1117,7 +1141,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | ||||
| /** | /** | ||||
| @@ -1126,7 +1150,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | ||||
| /** | /** | ||||
| @@ -1138,7 +1162,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); | ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); | ||||
| /** | /** | ||||
| @@ -1148,7 +1172,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand | |||||
| * @retval the aclmdlConfigHandle pointer | * @retval the aclmdlConfigHandle pointer | ||||
| * | * | ||||
| * @see aclmdlDestroyConfigHandle | * @see aclmdlDestroyConfigHandle | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); | ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); | ||||
| /** | /** | ||||
| @@ -1177,10 +1201,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ||||
| const void *attrValue, size_t valueSize); | |||||
| const void *attrValue, size_t valueSize); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ | |||||
| @@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length); | |||||
| static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | ||||
| typedef enum aclEngineType { | typedef enum aclEngineType { | ||||
| ACL_ENGINE_SYS, | |||||
| ACL_ENGINE_AICORE, | |||||
| ACL_ENGINE_VECTOR, | |||||
| ACL_ENGINE_SYS, | |||||
| ACL_ENGINE_AICORE, | |||||
| ACL_ENGINE_VECTOR, | |||||
| } aclopEngineType; | } aclopEngineType; | ||||
| /** | /** | ||||
| @@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | ||||
| const uint8_t *values); | |||||
| const uint8_t *values); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | ||||
| const int64_t *values); | |||||
| const int64_t *values); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | ||||
| const float *values); | |||||
| const float *values); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char * | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | ||||
| const char **values); | |||||
| const char **values); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -208,8 +208,11 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, | |||||
| const int *numValues, const int64_t *const values[]); | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, | |||||
| const char *attrName, | |||||
| int numLists, | |||||
| const int *numValues, | |||||
| const int64_t *const values[]); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -239,10 +242,15 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | ||||
| ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
| const aclDataBuffer *const inputs[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
| const aclopAttr *attr, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, | |||||
| int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], | |||||
| const aclDataBuffer *const inputs[], | |||||
| int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], | |||||
| aclDataBuffer *const outputs[], | |||||
| const aclopAttr *attr, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -272,9 +280,15 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, con | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
| aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, | |||||
| int numInputs, | |||||
| aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], | |||||
| int numOutputs, | |||||
| aclTensorDesc *outputDesc[], | |||||
| aclDataBuffer *outputs[], | |||||
| aclopAttr *attr, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -292,9 +306,12 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, a | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, | |||||
| int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *opAttr, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| /** | /** | ||||
| @@ -326,9 +343,12 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); | |||||
| * | * | ||||
| * @see aclopCreateHandle | aclCreateDataBuffer | * @see aclopCreateHandle | aclCreateDataBuffer | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, | |||||
| const aclDataBuffer *const inputs[], int numOutputs, | |||||
| aclDataBuffer *const outputs[], aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, | |||||
| int numInputs, | |||||
| const aclDataBuffer *const inputs[], | |||||
| int numOutputs, | |||||
| aclDataBuffer *const outputs[], | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -344,8 +364,11 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInp | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, | |||||
| const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, | |||||
| ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, | |||||
| const aclDataBuffer *srcBuffer, | |||||
| const aclTensorDesc *dstDesc, | |||||
| aclDataBuffer *dstBuffer, | |||||
| uint8_t truncate, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -360,9 +383,12 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDa | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, | |||||
| aclTensorDesc *dstDesc, | |||||
| uint8_t truncate, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| * @brief create kernel | * @brief create kernel | ||||
| @@ -381,10 +407,15 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, ac | |||||
| * | * | ||||
| * @see aclopCompile | * @see aclopCompile | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, | |||||
| void *binData, int binSize, aclopEngineType enginetype, | |||||
| ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, | |||||
| const char *kernelId, | |||||
| const char *kernelName, | |||||
| void *binData, | |||||
| int binSize, | |||||
| aclopEngineType enginetype, | |||||
| aclDataDeallocator deallocator); | aclDataDeallocator deallocator); | ||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| * @brief create kernel | * @brief create kernel | ||||
| @@ -399,8 +430,11 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *k | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
| typedef aclError (*aclopCompileFunc)(int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *opAttr, | |||||
| aclopKernelDesc *aclopKernelDesc); | aclopKernelDesc *aclopKernelDesc); | ||||
| /** | /** | ||||
| @@ -441,8 +475,11 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, | |||||
| const void *args, uint32_t argSize); | |||||
| ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, | |||||
| const char *kernelId, | |||||
| uint32_t blockDim, | |||||
| const void *args, | |||||
| uint32_t argSize); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -473,9 +510,12 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], const aclopAttr *attr); | |||||
| ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, | |||||
| int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *attr); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -493,12 +533,17 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
| ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, | |||||
| int numInputs, | |||||
| aclTensorDesc *inputDesc[], | |||||
| aclDataBuffer *inputs[], | |||||
| int numOutputs, | |||||
| aclTensorDesc *outputDesc[], | |||||
| aclopAttr *attr); | aclopAttr *attr); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
| @@ -24,18 +24,21 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; | |||||
| typedef enum aclCompileType { | |||||
| ACL_COMPILE_SYS, | |||||
| ACL_COMPILE_UNREGISTERED | |||||
| } aclopCompileType; | |||||
| typedef enum { | typedef enum { | ||||
| ACL_PRECISION_MODE, | |||||
| ACL_AICORE_NUM, | |||||
| ACL_AUTO_TUNE_MODE, | |||||
| ACL_OP_SELECT_IMPL_MODE, | |||||
| ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
| ACL_OP_DEBUG_LEVEL, | |||||
| ACL_DEBUG_DIR, | |||||
| ACL_OP_COMPILER_CACHE_MODE, | |||||
| ACL_OP_COMPILER_CACHE_DIR | |||||
| ACL_PRECISION_MODE, | |||||
| ACL_AICORE_NUM, | |||||
| ACL_AUTO_TUNE_MODE, | |||||
| ACL_OP_SELECT_IMPL_MODE, | |||||
| ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
| ACL_OP_DEBUG_LEVEL, | |||||
| ACL_DEBUG_DIR, | |||||
| ACL_OP_COMPILER_CACHE_MODE, | |||||
| ACL_OP_COMPILER_CACHE_DIR | |||||
| } aclCompileOpt; | } aclCompileOpt; | ||||
| /** | /** | ||||
| @@ -56,10 +59,15 @@ typedef enum { | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *attr, aclopEngineType engineType, | |||||
| aclopCompileType compileFlag, const char *opPath); | |||||
| ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, | |||||
| int numInputs, | |||||
| const aclTensorDesc *const inputDesc[], | |||||
| int numOutputs, | |||||
| const aclTensorDesc *const outputDesc[], | |||||
| const aclopAttr *attr, | |||||
| aclopEngineType engineType, | |||||
| aclopCompileType compileFlag, | |||||
| const char *opPath); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -82,10 +90,11 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( | |||||
| const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
| int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||||
| aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, | |||||
| int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
| int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
| const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, | |||||
| const char *opPath, aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -103,4 +112,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
| @@ -23,21 +23,21 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| #define ACL_PROF_ACL_API 0x0001 | |||||
| #define ACL_PROF_TASK_TIME 0x0002 | |||||
| #define ACL_PROF_AICORE_METRICS 0x0004 | |||||
| #define ACL_PROF_AICPU 0x0008 | |||||
| #define ACL_PROF_ACL_API 0x0001 | |||||
| #define ACL_PROF_TASK_TIME 0x0002 | |||||
| #define ACL_PROF_AICORE_METRICS 0x0004 | |||||
| #define ACL_PROF_AICPU 0x0008 | |||||
| #define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
| #define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
| #define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
| #define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
| typedef enum { | typedef enum { | ||||
| ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
| ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
| ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
| ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
| ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
| ACL_AICORE_NONE = 0xFF | |||||
| ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
| ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
| ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
| ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
| ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
| ACL_AICORE_NONE = 0xFF | |||||
| } aclprofAicoreMetrics; | } aclprofAicoreMetrics; | ||||
| typedef struct aclprofConfig aclprofConfig; | typedef struct aclprofConfig aclprofConfig; | ||||
| @@ -98,8 +98,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||||
| * @see aclprofDestroyConfig | * @see aclprofDestroyConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | ||||
| aclprofAicoreMetrics aicoreMetrics, | |||||
| aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
| aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -139,7 +138,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||||
| * | * | ||||
| * @see aclprofModelUnSubscribe | * @see aclprofModelUnSubscribe | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||||
| ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, | |||||
| const aclprofSubscribeConfig *profSubscribeConfig); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -167,7 +167,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||||
| * @see aclprofDestroySubscribeConfig | * @see aclprofDestroySubscribeConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | ||||
| aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
| aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -219,8 +219,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||||
| size_t opTypeLen); | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
| char *opType, size_t opTypeLen); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -235,8 +235,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||||
| size_t opNameLen); | |||||
| ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
| char *opName, size_t opNameLen); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -293,4 +293,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_PROF_H_ | |||||
| #endif // INC_EXTERNAL_ACL_PROF_H_ | |||||
| @@ -26,62 +26,62 @@ extern "C" { | |||||
| #endif | #endif | ||||
| typedef enum aclrtRunMode { | typedef enum aclrtRunMode { | ||||
| ACL_DEVICE, | |||||
| ACL_HOST, | |||||
| ACL_DEVICE, | |||||
| ACL_HOST, | |||||
| } aclrtRunMode; | } aclrtRunMode; | ||||
| typedef enum aclrtTsId { | typedef enum aclrtTsId { | ||||
| ACL_TS_ID_AICORE = 0, | |||||
| ACL_TS_ID_AIVECTOR = 1, | |||||
| ACL_TS_ID_RESERVED = 2, | |||||
| ACL_TS_ID_AICORE = 0, | |||||
| ACL_TS_ID_AIVECTOR = 1, | |||||
| ACL_TS_ID_RESERVED = 2, | |||||
| } aclrtTsId; | } aclrtTsId; | ||||
| typedef enum aclrtEventStatus { | typedef enum aclrtEventStatus { | ||||
| ACL_EVENT_STATUS_COMPLETE = 0, | |||||
| ACL_EVENT_STATUS_NOT_READY = 1, | |||||
| ACL_EVENT_STATUS_RESERVED = 2, | |||||
| ACL_EVENT_STATUS_COMPLETE = 0, | |||||
| ACL_EVENT_STATUS_NOT_READY = 1, | |||||
| ACL_EVENT_STATUS_RESERVED = 2, | |||||
| } aclrtEventStatus; | } aclrtEventStatus; | ||||
| typedef enum aclrtCallbackBlockType { | typedef enum aclrtCallbackBlockType { | ||||
| ACL_CALLBACK_NO_BLOCK, | |||||
| ACL_CALLBACK_BLOCK, | |||||
| ACL_CALLBACK_NO_BLOCK, | |||||
| ACL_CALLBACK_BLOCK, | |||||
| } aclrtCallbackBlockType; | } aclrtCallbackBlockType; | ||||
| typedef enum aclrtMemcpyKind { | typedef enum aclrtMemcpyKind { | ||||
| ACL_MEMCPY_HOST_TO_HOST, | |||||
| ACL_MEMCPY_HOST_TO_DEVICE, | |||||
| ACL_MEMCPY_DEVICE_TO_HOST, | |||||
| ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
| ACL_MEMCPY_HOST_TO_HOST, | |||||
| ACL_MEMCPY_HOST_TO_DEVICE, | |||||
| ACL_MEMCPY_DEVICE_TO_HOST, | |||||
| ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
| } aclrtMemcpyKind; | } aclrtMemcpyKind; | ||||
| typedef enum aclrtMemMallocPolicy { | typedef enum aclrtMemMallocPolicy { | ||||
| ACL_MEM_MALLOC_HUGE_FIRST, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
| ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
| ACL_MEM_MALLOC_HUGE_FIRST, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
| ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
| ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
| ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
| } aclrtMemMallocPolicy; | } aclrtMemMallocPolicy; | ||||
| typedef enum aclrtMemAttr { | typedef enum aclrtMemAttr { | ||||
| ACL_DDR_MEM, | |||||
| ACL_HBM_MEM, | |||||
| ACL_DDR_MEM_HUGE, | |||||
| ACL_DDR_MEM_NORMAL, | |||||
| ACL_HBM_MEM_HUGE, | |||||
| ACL_HBM_MEM_NORMAL, | |||||
| ACL_DDR_MEM_P2P_HUGE, | |||||
| ACL_DDR_MEM_P2P_NORMAL, | |||||
| ACL_HBM_MEM_P2P_HUGE, | |||||
| ACL_HBM_MEM_P2P_NORMAL, | |||||
| ACL_DDR_MEM, | |||||
| ACL_HBM_MEM, | |||||
| ACL_DDR_MEM_HUGE, | |||||
| ACL_DDR_MEM_NORMAL, | |||||
| ACL_HBM_MEM_HUGE, | |||||
| ACL_HBM_MEM_NORMAL, | |||||
| ACL_DDR_MEM_P2P_HUGE, | |||||
| ACL_DDR_MEM_P2P_NORMAL, | |||||
| ACL_HBM_MEM_P2P_HUGE, | |||||
| ACL_HBM_MEM_P2P_NORMAL, | |||||
| } aclrtMemAttr; | } aclrtMemAttr; | ||||
| typedef enum aclrtGroupAttr { | typedef enum aclrtGroupAttr { | ||||
| ACL_GROUP_AICORE_INT, | |||||
| ACL_GROUP_AIV_INT, | |||||
| ACL_GROUP_AIC_INT, | |||||
| ACL_GROUP_SDMANUM_INT, | |||||
| ACL_GROUP_ASQNUM_INT | |||||
| ACL_GROUP_AICORE_INT, | |||||
| ACL_GROUP_AIV_INT, | |||||
| ACL_GROUP_AIC_INT, | |||||
| ACL_GROUP_SDMANUM_INT, | |||||
| ACL_GROUP_ASQNUM_INT | |||||
| } aclrtGroupAttr; | } aclrtGroupAttr; | ||||
| typedef struct tagRtGroupInfo aclrtGroupInfo; | typedef struct tagRtGroupInfo aclrtGroupInfo; | ||||
| @@ -472,7 +472,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre | |||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | ||||
| /** | |||||
| /** | |||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| * @brief Queries an event's status | * @brief Queries an event's status | ||||
| * | * | ||||
| @@ -534,7 +534,9 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, | |||||
| * | * | ||||
| * @see aclrtFree | acldvppMalloc | aclrtMallocCached | * @see aclrtFree | acldvppMalloc | aclrtMallocCached | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, | |||||
| size_t size, | |||||
| aclrtMemMallocPolicy policy); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -557,7 +559,9 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMal | |||||
| * | * | ||||
| * @see aclrtFree | aclrtMalloc | * @see aclrtFree | aclrtMalloc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, | |||||
| size_t size, | |||||
| aclrtMemMallocPolicy policy); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -648,7 +652,10 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, | |||||
| size_t destMax, | |||||
| const void *src, | |||||
| size_t count, | |||||
| aclrtMemcpyKind kind); | aclrtMemcpyKind kind); | ||||
| /** | /** | ||||
| @@ -695,31 +702,38 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t | |||||
| * | * | ||||
| * @see aclrtSynchronizeStream | * @see aclrtSynchronizeStream | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||||
| aclrtMemcpyKind kind, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, | |||||
| size_t destMax, | |||||
| const void *src, | |||||
| size_t count, | |||||
| aclrtMemcpyKind kind, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | |||||
| * @brief Asynchronous initialize memory | |||||
| * and set contents of memory to specified value async | |||||
| * | |||||
| * @par Function | |||||
| * @ingroup AscendCL | |||||
| * @brief Asynchronous initialize memory | |||||
| * and set contents of memory to specified value async | |||||
| * | |||||
| * @par Function | |||||
| * The memory to be initialized is on the Host or device side, | * The memory to be initialized is on the Host or device side, | ||||
| * and the system determines whether | * and the system determines whether | ||||
| * it is host or device according to the address | * it is host or device according to the address | ||||
| * | * | ||||
| * @param devPtr [IN] destination address pointer | |||||
| * @param maxCount [IN] Max length of destination address memory | |||||
| * @param value [IN] set value | |||||
| * @param count [IN] the number of byte to set | |||||
| * @param stream [IN] asynchronized task stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||||
| * @param devPtr [IN] destination address pointer | |||||
| * @param maxCount [IN] Max length of destination address memory | |||||
| * @param value [IN] set value | |||||
| * @param count [IN] the number of byte to set | |||||
| * @param stream [IN] asynchronized task stream | |||||
| * | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | |||||
| * @retval OtherValues Failure | |||||
| * | |||||
| * @see aclrtSynchronizeStream | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, | |||||
| size_t maxCount, | |||||
| int32_t value, | |||||
| size_t count, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -865,8 +879,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||||
| * | * | ||||
| * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupId, | |||||
| aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||||
| ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, | |||||
| int32_t groupId, | |||||
| aclrtGroupAttr attr, | |||||
| void *attrValue, | |||||
| size_t valueLen, | |||||
| size_t *paramRetSize); | size_t *paramRetSize); | ||||
| /** | /** | ||||
| @@ -929,4 +946,5 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
| #endif // INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
| @@ -24,10 +24,10 @@ extern "C" { | |||||
| #endif | #endif | ||||
| enum acltdtTensorType { | enum acltdtTensorType { | ||||
| ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
| ACL_TENSOR_DATA_TENSOR, | |||||
| ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
| ACL_TENSOR_DATA_ABNORMAL | |||||
| ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
| ACL_TENSOR_DATA_TENSOR, | |||||
| ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
| ACL_TENSOR_DATA_ABNORMAL | |||||
| }; | }; | ||||
| typedef struct acltdtDataItem acltdtDataItem; | typedef struct acltdtDataItem acltdtDataItem; | ||||
| @@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem * | |||||
| * | * | ||||
| * @retval null for failed | * @retval null for failed | ||||
| * @retval OtherValues success | * @retval OtherValues success | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | ||||
| /** | /** | ||||
| @@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt | |||||
| * | * | ||||
| * @retval 0 for failed | * @retval 0 for failed | ||||
| * @retval OtherValues success | * @retval OtherValues success | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | ||||
| /** | /** | ||||
| @@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI | |||||
| * | * | ||||
| * @retval 0 for failed | * @retval 0 for failed | ||||
| * @retval OtherValues success | * @retval OtherValues success | ||||
| */ | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | ||||
| /** | /** | ||||
| @@ -118,8 +118,12 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte | |||||
| * | * | ||||
| * @see acltdtDestroyDataItem | * @see acltdtDestroyDataItem | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||||
| aclDataType dataType, void *data, size_t size); | |||||
| ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, | |||||
| const int64_t *dims, | |||||
| size_t dimNum, | |||||
| aclDataType dataType, | |||||
| void *data, | |||||
| size_t size); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -250,7 +254,8 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||||
| * | * | ||||
| * @see acltdtReceiveTensor | * @see acltdtReceiveTensor | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||||
| ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||||
| const acltdtDataset *dataset, | |||||
| int32_t timeout); | int32_t timeout); | ||||
| /** | /** | ||||
| @@ -266,11 +271,13 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||||
| * | * | ||||
| * @see acltdtSendTensor | * @see acltdtSendTensor | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||||
| ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, | |||||
| acltdtDataset *dataset, | |||||
| int32_t timeout); | int32_t timeout); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
| #endif //INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
| @@ -17,6 +17,20 @@ | |||||
| #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
| #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
| #if defined(_MSC_VER) | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #else | |||||
| #ifdef FUNC_VISIBILITY | |||||
| #define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
| #else | |||||
| #define GE_FUNC_VISIBILITY | |||||
| #endif | |||||
| #endif | |||||
| #include <stddef.h> | #include <stddef.h> | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -23,79 +23,80 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| @@ -23,9 +23,17 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; | |||||
| typedef enum aclTransType { | |||||
| ACL_TRANS_N, | |||||
| ACL_TRANS_T, | |||||
| ACL_TRANS_NZ, | |||||
| ACL_TRANS_NZ_T | |||||
| } aclTransType; | |||||
| typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; | |||||
| typedef enum aclComputeType { | |||||
| ACL_COMPUTE_HIGH_PRECISION, | |||||
| ACL_COMPUTE_LOW_PRECISION | |||||
| } aclComputeType; | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -53,11 +61,12 @@ typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECIS | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, | |||||
| aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, | |||||
| const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
| aclComputeType type, aclrtStream stream); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, | |||||
| const void *alpha, const void *a, int lda, aclDataType dataTypeA, | |||||
| const void *x, int incx, aclDataType dataTypeX, | |||||
| const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
| aclComputeType type, aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -74,10 +83,15 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, co | |||||
| * | * | ||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, | |||||
| aclDataType dataTypeX, aclDataType dataTypeY, | |||||
| aclComputeType type, aclopHandle **handle); | |||||
| */ | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, | |||||
| int m, | |||||
| int n, | |||||
| aclDataType dataTypeA, | |||||
| aclDataType dataTypeX, | |||||
| aclDataType dataTypeY, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -101,9 +115,18 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, i | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, | |||||
| const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, | |||||
| const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, | |||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, | |||||
| int m, | |||||
| int n, | |||||
| const aclFloat16 *alpha, | |||||
| const aclFloat16 *a, | |||||
| int lda, | |||||
| const aclFloat16 *x, | |||||
| int incx, | |||||
| const aclFloat16 *beta, | |||||
| aclFloat16 *y, | |||||
| int incy, | |||||
| aclComputeType type, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -119,7 +142,10 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, con | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, | |||||
| int m, | |||||
| int n, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| /** | /** | ||||
| @@ -145,9 +171,19 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, in | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, | |||||
| int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, | |||||
| int incy, aclComputeType type, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, | |||||
| int m, | |||||
| int n, | |||||
| const int32_t *alpha, | |||||
| const int8_t *a, | |||||
| int lda, | |||||
| const int8_t *x, | |||||
| int incx, | |||||
| const int32_t *beta, | |||||
| int32_t *y, | |||||
| int incy, | |||||
| aclComputeType type, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -162,7 +198,10 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, co | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, | |||||
| int m, | |||||
| int n, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| /** | /** | ||||
| @@ -194,11 +233,26 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, i | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const void *alpha, const void *matrixA, int lda, | |||||
| aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, | |||||
| const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, | |||||
| aclComputeType type, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| const void *alpha, | |||||
| const void *matrixA, | |||||
| int lda, | |||||
| aclDataType dataTypeA, | |||||
| const void *matrixB, | |||||
| int ldb, | |||||
| aclDataType dataTypeB, | |||||
| const void *beta, | |||||
| void *matrixC, | |||||
| int ldc, | |||||
| aclDataType dataTypeC, | |||||
| aclComputeType type, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -220,10 +274,18 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType tra | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclDataType dataTypeA, | |||||
| aclDataType dataTypeB, aclDataType dataTypeC, | |||||
| aclComputeType type, aclopHandle **handle); | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| aclDataType dataTypeA, | |||||
| aclDataType dataTypeB, | |||||
| aclDataType dataTypeC, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -251,10 +313,22 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, a | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, | |||||
| const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, | |||||
| aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| const aclFloat16 *alpha, | |||||
| const aclFloat16 *matrixA, | |||||
| int lda, | |||||
| const aclFloat16 *matrixB, | |||||
| int ldb, | |||||
| const aclFloat16 *beta, | |||||
| aclFloat16 *matrixC, | |||||
| int ldc, | |||||
| aclComputeType type, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -272,8 +346,13 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType tran | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclComputeType type, | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| /** | /** | ||||
| @@ -302,10 +381,23 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, ac | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
| int k, const int32_t *alpha, const int8_t *matrixA, int lda, | |||||
| const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, | |||||
| int ldc, aclComputeType type, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| const int32_t *alpha, | |||||
| const int8_t *matrixA, | |||||
| int lda, | |||||
| const int8_t *matrixB, | |||||
| int ldb, | |||||
| const int32_t *beta, | |||||
| int32_t *matrixC, | |||||
| int ldc, | |||||
| aclComputeType type, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -323,12 +415,17 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType tra | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
| int m, int n, int k, aclComputeType type, | |||||
| ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, | |||||
| aclTransType transB, | |||||
| aclTransType transC, | |||||
| int m, | |||||
| int n, | |||||
| int k, | |||||
| aclComputeType type, | |||||
| aclopHandle **handle); | aclopHandle **handle); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
| @@ -53,109 +53,123 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output | |||||
| // Supported Pixel Format | // Supported Pixel Format | ||||
| enum acldvppPixelFormat { | enum acldvppPixelFormat { | ||||
| PIXEL_FORMAT_YUV_400 = 0, // 0 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 | |||||
| PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 | |||||
| PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 | |||||
| PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 | |||||
| PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 | |||||
| PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 | |||||
| PIXEL_FORMAT_RGB_888 = 12, // 12 | |||||
| PIXEL_FORMAT_BGR_888 = 13, // 13 | |||||
| PIXEL_FORMAT_ARGB_8888 = 14, // 14 | |||||
| PIXEL_FORMAT_ABGR_8888 = 15, // 15 | |||||
| PIXEL_FORMAT_RGBA_8888 = 16, // 16 | |||||
| PIXEL_FORMAT_BGRA_8888 = 17, // 17 | |||||
| PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 | |||||
| PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 | |||||
| PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 | |||||
| PIXEL_FORMAT_YVU_PLANAR_422, | |||||
| PIXEL_FORMAT_YVU_PLANAR_444, | |||||
| PIXEL_FORMAT_RGB_444 = 23, | |||||
| PIXEL_FORMAT_BGR_444, | |||||
| PIXEL_FORMAT_ARGB_4444, | |||||
| PIXEL_FORMAT_ABGR_4444, | |||||
| PIXEL_FORMAT_RGBA_4444, | |||||
| PIXEL_FORMAT_BGRA_4444, | |||||
| PIXEL_FORMAT_RGB_555, | |||||
| PIXEL_FORMAT_BGR_555, | |||||
| PIXEL_FORMAT_RGB_565, | |||||
| PIXEL_FORMAT_BGR_565, | |||||
| PIXEL_FORMAT_ARGB_1555, | |||||
| PIXEL_FORMAT_ABGR_1555, | |||||
| PIXEL_FORMAT_RGBA_1555, | |||||
| PIXEL_FORMAT_BGRA_1555, | |||||
| PIXEL_FORMAT_ARGB_8565, | |||||
| PIXEL_FORMAT_ABGR_8565, | |||||
| PIXEL_FORMAT_RGBA_8565, | |||||
| PIXEL_FORMAT_BGRA_8565, | |||||
| PIXEL_FORMAT_RGB_BAYER_8BPP = 50, | |||||
| PIXEL_FORMAT_RGB_BAYER_10BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_12BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_14BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_16BPP, | |||||
| PIXEL_FORMAT_BGR_888_PLANAR = 70, | |||||
| PIXEL_FORMAT_HSV_888_PACKAGE, | |||||
| PIXEL_FORMAT_HSV_888_PLANAR, | |||||
| PIXEL_FORMAT_LAB_888_PACKAGE, | |||||
| PIXEL_FORMAT_LAB_888_PLANAR, | |||||
| PIXEL_FORMAT_S8C1, | |||||
| PIXEL_FORMAT_S8C2_PACKAGE, | |||||
| PIXEL_FORMAT_S8C2_PLANAR, | |||||
| PIXEL_FORMAT_S16C1, | |||||
| PIXEL_FORMAT_U8C1, | |||||
| PIXEL_FORMAT_U16C1, | |||||
| PIXEL_FORMAT_S32C1, | |||||
| PIXEL_FORMAT_U32C1, | |||||
| PIXEL_FORMAT_U64C1, | |||||
| PIXEL_FORMAT_S64C1, | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_440, | |||||
| PIXEL_FORMAT_FLOAT32, | |||||
| PIXEL_FORMAT_BUTT, | |||||
| PIXEL_FORMAT_UNKNOWN = 10000 | |||||
| PIXEL_FORMAT_YUV_400 = 0, // 0 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 | |||||
| PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 | |||||
| PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 | |||||
| PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 | |||||
| PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 | |||||
| PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 | |||||
| PIXEL_FORMAT_RGB_888 = 12, // 12 | |||||
| PIXEL_FORMAT_BGR_888 = 13, // 13 | |||||
| PIXEL_FORMAT_ARGB_8888 = 14, // 14 | |||||
| PIXEL_FORMAT_ABGR_8888 = 15, // 15 | |||||
| PIXEL_FORMAT_RGBA_8888 = 16, // 16 | |||||
| PIXEL_FORMAT_BGRA_8888 = 17, // 17 | |||||
| PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 | |||||
| PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 | |||||
| PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 | |||||
| PIXEL_FORMAT_YVU_PLANAR_422, | |||||
| PIXEL_FORMAT_YVU_PLANAR_444, | |||||
| PIXEL_FORMAT_RGB_444 = 23, | |||||
| PIXEL_FORMAT_BGR_444, | |||||
| PIXEL_FORMAT_ARGB_4444, | |||||
| PIXEL_FORMAT_ABGR_4444, | |||||
| PIXEL_FORMAT_RGBA_4444, | |||||
| PIXEL_FORMAT_BGRA_4444, | |||||
| PIXEL_FORMAT_RGB_555, | |||||
| PIXEL_FORMAT_BGR_555, | |||||
| PIXEL_FORMAT_RGB_565, | |||||
| PIXEL_FORMAT_BGR_565, | |||||
| PIXEL_FORMAT_ARGB_1555, | |||||
| PIXEL_FORMAT_ABGR_1555, | |||||
| PIXEL_FORMAT_RGBA_1555, | |||||
| PIXEL_FORMAT_BGRA_1555, | |||||
| PIXEL_FORMAT_ARGB_8565, | |||||
| PIXEL_FORMAT_ABGR_8565, | |||||
| PIXEL_FORMAT_RGBA_8565, | |||||
| PIXEL_FORMAT_BGRA_8565, | |||||
| PIXEL_FORMAT_RGB_BAYER_8BPP = 50, | |||||
| PIXEL_FORMAT_RGB_BAYER_10BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_12BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_14BPP, | |||||
| PIXEL_FORMAT_RGB_BAYER_16BPP, | |||||
| PIXEL_FORMAT_BGR_888_PLANAR = 70, | |||||
| PIXEL_FORMAT_HSV_888_PACKAGE, | |||||
| PIXEL_FORMAT_HSV_888_PLANAR, | |||||
| PIXEL_FORMAT_LAB_888_PACKAGE, | |||||
| PIXEL_FORMAT_LAB_888_PLANAR, | |||||
| PIXEL_FORMAT_S8C1, | |||||
| PIXEL_FORMAT_S8C2_PACKAGE, | |||||
| PIXEL_FORMAT_S8C2_PLANAR, | |||||
| PIXEL_FORMAT_S16C1, | |||||
| PIXEL_FORMAT_U8C1, | |||||
| PIXEL_FORMAT_U16C1, | |||||
| PIXEL_FORMAT_S32C1, | |||||
| PIXEL_FORMAT_U32C1, | |||||
| PIXEL_FORMAT_U64C1, | |||||
| PIXEL_FORMAT_S64C1, | |||||
| PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, | |||||
| PIXEL_FORMAT_YVU_SEMIPLANAR_440, | |||||
| PIXEL_FORMAT_FLOAT32, | |||||
| PIXEL_FORMAT_BUTT, | |||||
| PIXEL_FORMAT_UNKNOWN = 10000 | |||||
| }; | }; | ||||
| // Stream Format | // Stream Format | ||||
| enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; | |||||
| enum acldvppStreamFormat { | |||||
| H265_MAIN_LEVEL = 0, | |||||
| H264_BASELINE_LEVEL, | |||||
| H264_MAIN_LEVEL, | |||||
| H264_HIGH_LEVEL | |||||
| }; | |||||
| // Supported Channel Mode | // Supported Channel Mode | ||||
| enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; | |||||
| enum acldvppChannelMode { | |||||
| DVPP_CHNMODE_VPC = 1, | |||||
| DVPP_CHNMODE_JPEGD = 2, | |||||
| DVPP_CHNMODE_JPEGE = 4 | |||||
| }; | |||||
| // Supported Border Type | // Supported Border Type | ||||
| enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; | |||||
| enum acldvppBorderType { | |||||
| BORDER_CONSTANT = 0, | |||||
| BORDER_REPLICATE, | |||||
| BORDER_REFLECT, | |||||
| BORDER_REFLECT_101 | |||||
| }; | |||||
| // Venc parameter type | // Venc parameter type | ||||
| enum aclvencChannelDescParamType { | enum aclvencChannelDescParamType { | ||||
| ACL_VENC_THREAD_ID_UINT64 = 0, | |||||
| ACL_VENC_CALLBACK_PTR, | |||||
| ACL_VENC_PIXEL_FORMAT_UINT32, | |||||
| ACL_VENC_ENCODE_TYPE_UINT32, | |||||
| ACL_VENC_PIC_WIDTH_UINT32, | |||||
| ACL_VENC_PIC_HEIGHT_UINT32, | |||||
| ACL_VENC_KEY_FRAME_INTERVAL_UINT32, | |||||
| ACL_VENC_BUF_ADDR_PTR, | |||||
| ACL_VENC_BUF_SIZE_UINT32, | |||||
| ACL_VENC_RC_MODE_UINT32, | |||||
| ACL_VENC_SRC_RATE_UINT32, | |||||
| ACL_VENC_MAX_BITRATE_UINT32, | |||||
| ACL_VENC_MAX_IP_PROP_UINT32 | |||||
| ACL_VENC_THREAD_ID_UINT64 = 0, | |||||
| ACL_VENC_CALLBACK_PTR, | |||||
| ACL_VENC_PIXEL_FORMAT_UINT32, | |||||
| ACL_VENC_ENCODE_TYPE_UINT32, | |||||
| ACL_VENC_PIC_WIDTH_UINT32, | |||||
| ACL_VENC_PIC_HEIGHT_UINT32, | |||||
| ACL_VENC_KEY_FRAME_INTERVAL_UINT32, | |||||
| ACL_VENC_BUF_ADDR_PTR, | |||||
| ACL_VENC_BUF_SIZE_UINT32, | |||||
| ACL_VENC_RC_MODE_UINT32, | |||||
| ACL_VENC_SRC_RATE_UINT32, | |||||
| ACL_VENC_MAX_BITRATE_UINT32, | |||||
| ACL_VENC_MAX_IP_PROP_UINT32 | |||||
| }; | }; | ||||
| // Jpeg picture format | // Jpeg picture format | ||||
| enum acldvppJpegFormat { | enum acldvppJpegFormat { | ||||
| ACL_JPEG_CSS_444 = 0, | |||||
| ACL_JPEG_CSS_422, | |||||
| ACL_JPEG_CSS_420, | |||||
| ACL_JPEG_CSS_GRAY, | |||||
| ACL_JPEG_CSS_440, | |||||
| ACL_JPEG_CSS_411, | |||||
| ACL_JPEG_CSS_UNKNOWN = 1000 | |||||
| ACL_JPEG_CSS_444 = 0, | |||||
| ACL_JPEG_CSS_422, | |||||
| ACL_JPEG_CSS_420, | |||||
| ACL_JPEG_CSS_GRAY, | |||||
| ACL_JPEG_CSS_440, | |||||
| ACL_JPEG_CSS_411, | |||||
| ACL_JPEG_CSS_UNKNOWN = 1000 | |||||
| }; | }; | ||||
| /** | /** | ||||
| @@ -509,7 +523,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD | |||||
| * @retval null for failed. | * @retval null for failed. | ||||
| * @retval other success | * @retval other success | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, | |||||
| ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, | |||||
| uint32_t right, | |||||
| uint32_t top, | |||||
| uint32_t bottom); | uint32_t bottom); | ||||
| /** | /** | ||||
| @@ -588,7 +604,10 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, | |||||
| uint32_t left, | |||||
| uint32_t right, | |||||
| uint32_t top, | |||||
| uint32_t bottom); | uint32_t bottom); | ||||
| /** | /** | ||||
| @@ -1077,8 +1096,7 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc | |||||
| * @retval ACL_SUCCESS for success, other for failure | * @retval ACL_SUCCESS for success, other for failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, | ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, | ||||
| aclvencChannelDescParamType paramType, size_t length, | |||||
| const void *param); | |||||
| aclvencChannelDescParamType paramType, size_t length, const void *param); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1227,8 +1245,7 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne | |||||
| * @retval ACL_SUCCESS for success, other for failure | * @retval ACL_SUCCESS for success, other for failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, | ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, | ||||
| aclvencChannelDescParamType paramType, size_t length, | |||||
| size_t *paramRetSize, void *param); | |||||
| aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1528,7 +1545,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, | |||||
| uint32_t size, | |||||
| uint32_t *width, | |||||
| uint32_t *height, | |||||
| int32_t *components); | int32_t *components); | ||||
| /** | /** | ||||
| @@ -1545,8 +1565,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, | |||||
| uint32_t *height, int32_t *components, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, | |||||
| uint32_t size, | |||||
| uint32_t *width, | |||||
| uint32_t *height, | |||||
| int32_t *components, | |||||
| acldvppJpegFormat *format); | acldvppJpegFormat *format); | ||||
| /** | /** | ||||
| @@ -1561,7 +1584,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_ | |||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, | ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, | ||||
| const acldvppJpegeConfig *config, uint32_t *size); | |||||
| const acldvppJpegeConfig *config, | |||||
| uint32_t *size); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1575,8 +1599,10 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, | |||||
| acldvppPixelFormat outputPixelFormat, uint32_t *decSize); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, | |||||
| uint32_t dataSize, | |||||
| acldvppPixelFormat outputPixelFormat, | |||||
| uint32_t *decSize); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1591,8 +1617,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_ | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, | |||||
| uint32_t *height, int32_t *components); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, | |||||
| uint32_t dataSize, | |||||
| uint32_t *width, | |||||
| uint32_t *height, | |||||
| int32_t *components); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1606,8 +1635,10 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t d | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, | |||||
| acldvppPixelFormat outputPixelFormat, uint32_t *decSize); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, | |||||
| uint32_t dataSize, | |||||
| acldvppPixelFormat outputPixelFormat, | |||||
| uint32_t *decSize); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1671,8 +1702,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe | |||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | * @see acldvppCreateChannel | acldvppCreatePicDesc | ||||
| * | acldvppCreateResizeConfig | * | acldvppCreateResizeConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| acldvppResizeConfig *resizeConfig, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -1708,8 +1741,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| acldvppRoiConfig *cropArea, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -1734,9 +1769,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc | |||||
| * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, | ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, | ||||
| acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, | |||||
| uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, | |||||
| acldvppRoiConfig *cropAreas[], aclrtStream stream); | |||||
| acldvppBatchPicDesc *srcBatchPicDescs, | |||||
| uint32_t *roiNums, | |||||
| uint32_t size, | |||||
| acldvppBatchPicDesc *dstBatchPicDescs, | |||||
| acldvppRoiConfig *cropAreas[], | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1759,9 +1797,12 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||||
| acldvppRoiConfig *pasteArea, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| acldvppRoiConfig *cropArea, | |||||
| acldvppRoiConfig *pasteArea, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1786,11 +1827,14 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, | |||||
| uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, | |||||
| acldvppRoiConfig *cropAreas[], | |||||
| acldvppRoiConfig *pasteAreas[], aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppBatchPicDesc *srcBatchPicDescs, | |||||
| uint32_t *roiNums, | |||||
| uint32_t size, | |||||
| acldvppBatchPicDesc *dstBatchPicDescs, | |||||
| acldvppRoiConfig *cropAreas[], | |||||
| acldvppRoiConfig *pasteAreas[], | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1818,8 +1862,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | * @see acldvppCreateChannel | acldvppCreatePicDesc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, | |||||
| acldvppPicDesc *outputDesc, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, | |||||
| const void *data, | |||||
| uint32_t size, | |||||
| acldvppPicDesc *outputDesc, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1837,8 +1884,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreateJpegeConfig | * @see acldvppCreateChannel | acldvppCreateJpegeConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| const void *data, uint32_t *size, acldvppJpegeConfig *config, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| const void *data, | |||||
| uint32_t *size, | |||||
| acldvppJpegeConfig *config, | |||||
| aclrtStream stream); | aclrtStream stream); | ||||
| /** | /** | ||||
| @@ -1856,8 +1906,11 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | * @see acldvppCreateChannel | acldvppCreatePicDesc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, | |||||
| acldvppPicDesc *outputDesc, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, | |||||
| const void *data, | |||||
| uint32_t size, | |||||
| acldvppPicDesc *outputDesc, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1912,8 +1965,11 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe | |||||
| * | * | ||||
| * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, | |||||
| acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); | |||||
| ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, | |||||
| acldvppStreamDesc *input, | |||||
| acldvppPicDesc *output, | |||||
| aclvdecFrameConfig *config, | |||||
| void *userData); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1932,8 +1988,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, a | |||||
| * | * | ||||
| * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame | * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, | |||||
| aclvdecFrameConfig *config, void *userData); | |||||
| ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, | |||||
| acldvppStreamDesc *input, | |||||
| aclvdecFrameConfig *config, | |||||
| void *userData); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1954,8 +2012,10 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | * @see acldvppCreateChannel | acldvppCreatePicDesc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1977,8 +2037,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | * @see acldvppCreateChannel | acldvppCreatePicDesc | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| void *reserve, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -1990,7 +2053,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, | |||||
| uint32_t mode); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2025,7 +2089,8 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); | |||||
| ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, | |||||
| uint32_t outMode); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2122,7 +2187,9 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); | |||||
| * @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
| * @retval OtherValues Failure | * @retval OtherValues Failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, | |||||
| uint32_t dim, | |||||
| uint8_t **data, | |||||
| uint32_t *len); | uint32_t *len); | ||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2140,8 +2207,10 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, u | |||||
| * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap | * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, | ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, | ||||
| const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, | |||||
| const acldvppLutMap *lutMap, aclrtStream stream); | |||||
| const acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| const acldvppLutMap *lutMap, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2162,7 +2231,8 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); | |||||
| * | * | ||||
| * @retval ACL_SUCCESS for success, other for failure | * @retval ACL_SUCCESS for success, other for failure | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, | |||||
| ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, | |||||
| uint32_t index, | |||||
| double value); | double value); | ||||
| /** | /** | ||||
| @@ -2307,8 +2377,10 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor | |||||
| * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig | * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, | ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, | ||||
| const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, | |||||
| const acldvppBorderConfig *borderConfig, aclrtStream stream); | |||||
| const acldvppPicDesc *inputDesc, | |||||
| acldvppPicDesc *outputDesc, | |||||
| const acldvppBorderConfig *borderConfig, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2325,8 +2397,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc | |||||
| * | * | ||||
| * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist | * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, | |||||
| acldvppHist *hist, void *reserve, aclrtStream stream); | |||||
| ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, | |||||
| acldvppPicDesc *srcPicDesc, | |||||
| acldvppHist *hist, | |||||
| void *reserve, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2335,7 +2410,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel | |||||
| * @retval null for failed. | * @retval null for failed. | ||||
| * @retval OtherValues success. | * @retval OtherValues success. | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); | |||||
| ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2392,7 +2467,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, | |||||
| * | * | ||||
| * @see acldvppCreateHist | acldvppVpcCalcHistAsync | * @see acldvppCreateHist | acldvppVpcCalcHistAsync | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); | |||||
| ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -2415,4 +2490,4 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ | |||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ | |||||
| @@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult; | |||||
| // search operation type | // search operation type | ||||
| enum aclfvSearchType { | enum aclfvSearchType { | ||||
| SEARCH_1_N, // 1:N operation type | |||||
| SEARCH_N_M // N:M operation type | |||||
| SEARCH_1_N, // 1:N operation type | |||||
| SEARCH_N_M // N:M operation type | |||||
| }; | }; | ||||
| /** | /** | ||||
| @@ -104,8 +104,7 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t | |||||
| * @retval OtherValues success. | * @retval OtherValues success. | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | ||||
| uint32_t featureLen, uint32_t featureCount, | |||||
| uint8_t *featureData, uint32_t featureDataLen); | |||||
| uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -234,9 +233,8 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp | |||||
| * @retval null for failed. OtherValues success | * @retval null for failed. OtherValues success | ||||
| */ | */ | ||||
| ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | ||||
| uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, | |||||
| uint32_t *resultOffset, float *resultDistance, | |||||
| uint32_t dataLen); | |||||
| uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance, | |||||
| uint32_t dataLen); | |||||
| /** | /** | ||||
| * @ingroup AscendCL | * @ingroup AscendCL | ||||
| @@ -350,4 +348,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
| #endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
| @@ -27,7 +27,7 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif // __cplusplus | |||||
| #endif // __cplusplus | |||||
| /** | /** | ||||
| * @brief Initialize HCCL. | * @brief Initialize HCCL. | ||||
| @@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | * @param sendBuf A pointer identifying the input data address of the operator. | ||||
| * @param recvBuf A pointer identifying the output data address of the operator. | * @param recvBuf A pointer identifying the output data address of the operator. | ||||
| * @param count An integer(u64) identifying the number of the output data. | * @param count An integer(u64) identifying the number of the output data. | ||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
| * float32. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
| * @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
| * @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
| * @return HcclResult | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
| HcclComm comm, aclrtStream stream); | |||||
| extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, | |||||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
| /** | /** | ||||
| * @brief Broadcast operator. | * @brief Broadcast operator. | ||||
| @@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc | |||||
| * @param root An integer(u32) identifying the the root rank in the operator. | * @param root An integer(u32) identifying the the root rank in the operator. | ||||
| * @param comm A pointer identifying the communication resource based on | * @param comm A pointer identifying the communication resource based on | ||||
| * @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
| * @return HcclResult | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| /** | /** | ||||
| * @brief ReduceScatter operator. | * @brief ReduceScatter operator. | ||||
| @@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
| * @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
| * @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
| * @return HcclResult | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
| extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
| /** | /** | ||||
| * @brief AllGather operator. | * @brief AllGather operator. | ||||
| @@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | ||||
| * @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
| * @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
| * @return HcclResult | |||||
| * @return HcclResult | |||||
| */ | */ | ||||
| extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, | |||||
| HcclComm comm, aclrtStream stream); | |||||
| /** | /** | ||||
| * @brief Destroy HCCL comm | * @brief Destroy HCCL comm | ||||
| @@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | |||||
| #endif // HCCL_H_ | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_H_ | |||||
| @@ -16,10 +16,10 @@ | |||||
| /** | /** | ||||
| * @file hccl_types.h | * @file hccl_types.h | ||||
| * @brief HCCL data type definition | |||||
| * | |||||
| * @brief HCCL data type definition | |||||
| * | |||||
| */ | */ | ||||
| #ifndef HCCL_TYPES_H_ | #ifndef HCCL_TYPES_H_ | ||||
| #define HCCL_TYPES_H_ | #define HCCL_TYPES_H_ | ||||
| @@ -27,33 +27,33 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif // __cplusplus | |||||
| #endif // __cplusplus | |||||
| /** | /** | ||||
| * @brief HCCL functions return value definition | * @brief HCCL functions return value definition | ||||
| */ | */ | ||||
| typedef enum { | typedef enum { | ||||
| HCCL_SUCCESS = 0, /**< success */ | |||||
| HCCL_E_PARA = 1, /**< parameter error */ | |||||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
| HCCL_E_RESERVED /**< reserved */ | |||||
| HCCL_SUCCESS = 0, /**< success */ | |||||
| HCCL_E_PARA = 1, /**< parameter error */ | |||||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
| HCCL_E_RESERVED /**< reserved */ | |||||
| } HcclResult; | } HcclResult; | ||||
| /** | /** | ||||
| @@ -65,37 +65,37 @@ typedef void *HcclComm; | |||||
| * @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
| */ | */ | ||||
| typedef enum { | typedef enum { | ||||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||||
| } HcclReduceOp; | } HcclReduceOp; | ||||
| /** | /** | ||||
| * @brief HCCL data type | * @brief HCCL data type | ||||
| */ | */ | ||||
| typedef enum { | typedef enum { | ||||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
| } HcclDataType; | } HcclDataType; | ||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
| /** | /** | ||||
| * @brief HCCL root info | * @brief HCCL root info | ||||
| */ | */ | ||||
| typedef struct HcclRootInfoDef { | typedef struct HcclRootInfoDef { | ||||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||||
| } HcclRootInfo; | } HcclRootInfo; | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif // __cplusplus | |||||
| #endif // HCCL_TYPES_H_ | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_TYPES_H_ | |||||
| @@ -23,79 +23,80 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
| static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
| static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
| static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
| static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
| static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
| static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
| static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
| static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
| static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
| static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
| static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
| static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
| static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
| static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
| static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
| static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
| static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
| static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
| static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
| static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
| static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
| static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
| static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
| static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
| static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
| static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
| static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
| static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
| static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
| static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
| static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
| static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
| static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
| static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
| static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
| static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
| static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
| static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
| static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
| static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
| static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
| static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
| static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
| static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
| static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
| static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
| @@ -107,11 +107,13 @@ across multiple sessions . \n | |||||
| REG_OP(Unbatch) | REG_OP(Unbatch) | ||||
| .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(index, TensorType({DT_INT64})) | .INPUT(index, TensorType({DT_INT64})) | ||||
| .INPUT(id, TensorType({DT_INT64})) | .INPUT(id, TensorType({DT_INT64})) | ||||
| .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .REQUIRED_ATTR(timeout_micros, Int) | .REQUIRED_ATTR(timeout_micros, Int) | ||||
| .ATTR(container, String, "") | .ATTR(container, String, "") | ||||
| .ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
| @@ -146,13 +148,16 @@ across multiple sessions . \n | |||||
| REG_OP(UnbatchGrad) | REG_OP(UnbatchGrad) | ||||
| .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(index, TensorType({DT_INT64})) | .INPUT(index, TensorType({DT_INT64})) | ||||
| .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(id, TensorType({DT_INT64})) | .INPUT(id, TensorType({DT_INT64})) | ||||
| .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ | ||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE})) | |||||
| DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .ATTR(container, String, "") | .ATTR(container, String, "") | ||||
| .ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
| .OP_END_FACTORY_REG(UnbatchGrad) | .OP_END_FACTORY_REG(UnbatchGrad) | ||||
| @@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear) | |||||
| .ATTR(shared_name, String, "") | .ATTR(shared_name, String, "") | ||||
| .OP_END_FACTORY_REG(OrderedMapClear) | .OP_END_FACTORY_REG(OrderedMapClear) | ||||
| /** | |||||
| *@brief FakeQueue, support tf api FixedLengthRecordReader. \n | |||||
| *@par Inputs: | |||||
| *Including: | |||||
| * @li resource: A Tensor of type DT_RESOURCE. | |||||
| *@par Outputs: | |||||
| *handle: A Tensor of type DT_STRING ref. \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with the TensorFlow operator FakeQueue. | |||||
| */ | |||||
| REG_OP(FakeQueue) | |||||
| .INPUT(resource, TensorType({DT_RESOURCE})) | |||||
| .OUTPUT(handle, TensorType({DT_STRING})) | |||||
| .OP_END_FACTORY_REG(FakeQueue) | |||||
| /** | /** | ||||
| *@brief Returns the number of incomplete elements in the underlying container. \n | *@brief Returns the number of incomplete elements in the underlying container. \n | ||||
| @@ -2258,6 +2276,7 @@ REG_OP(LruCache) | |||||
| .ATTR(shared_name, String, "LruCache") | .ATTR(shared_name, String, "LruCache") | ||||
| .ATTR(cache_size, Int, 100000) | .ATTR(cache_size, Int, 100000) | ||||
| .ATTR(load_factor, Float, 1) | .ATTR(load_factor, Float, 1) | ||||
| .REQUIRED_ATTR(dtype, Type) | |||||
| .OP_END_FACTORY_REG(LruCache) | .OP_END_FACTORY_REG(LruCache) | ||||
| /** | /** | ||||
| @@ -2277,9 +2296,9 @@ REG_OP(CacheAdd) | |||||
| .INPUT(cache, TensorType({DT_RESOURCE})) | .INPUT(cache, TensorType({DT_RESOURCE})) | ||||
| .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
| .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
| .OUTPUT(swap_in_idx, TensorType({DT_INT64})) | |||||
| .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
| .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
| .OUTPUT(swap_out_idx, TensorType({DT_INT64})) | |||||
| .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
| .OP_END_FACTORY_REG(CacheAdd) | .OP_END_FACTORY_REG(CacheAdd) | ||||
| /** | /** | ||||
| @@ -2295,9 +2314,31 @@ REG_OP(CacheAdd) | |||||
| REG_OP(CacheRemoteIndexToLocal) | REG_OP(CacheRemoteIndexToLocal) | ||||
| .INPUT(cache, TensorType({DT_RESOURCE})) | .INPUT(cache, TensorType({DT_RESOURCE})) | ||||
| .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | ||||
| .OUTPUT(local_idx, TensorType({DT_INT64})) | |||||
| .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
| .OP_END_FACTORY_REG(CacheRemoteIndexToLocal) | .OP_END_FACTORY_REG(CacheRemoteIndexToLocal) | ||||
| /** | |||||
| *@brief CacheAllToLocalIndex, get id in cache | |||||
| *@par Inputs: | |||||
| *cache: resource data | |||||
| *local_idx: id in cache. | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(CacheAllIndexToLocal) | |||||
| .INPUT(cache, TensorType({DT_RESOURCE})) | |||||
| .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32})) | |||||
| .REQUIRED_ATTR(dtype, Type) | |||||
| .OP_END_FACTORY_REG(CacheAllIndexToLocal) | |||||
| REG_OP(DynamicGetNext) | |||||
| .INPUT(x, TensorType::ALL()) | |||||
| .DYNAMIC_OUTPUT(y, TensorType::ALL()) | |||||
| .ATTR(output_types, ListType, {}) | |||||
| .ATTR(output_shapes, ListListInt, {{}, {}}) | |||||
| .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile") | |||||
| .ATTR(_getnext_inputs_shape_range, String, "") | |||||
| .OP_END_FACTORY_REG(DynamicGetNext) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ | ||||
| @@ -3627,6 +3627,35 @@ REG_OP(Lerp) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(Lerp) | .OP_END_FACTORY_REG(Lerp) | ||||
| /** | |||||
| *@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n | |||||
| * | |||||
| *@par Inputs: | |||||
| *@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16 | |||||
| *@li x2: A tensor of the same type as "x1". | |||||
| * | |||||
| *@par Attributes: | |||||
| * atol: Defaults to "1e-05". | |||||
| * rtol: Defaults to "1e-03". | |||||
| * | |||||
| *@par Outputs: | |||||
| * num: A tensor of type int32. | |||||
| * diff: A tensor of type float16. | |||||
| * | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| * | |||||
| */ | |||||
| REG_OP(DataCompare) | |||||
| .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) | |||||
| .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 })) | |||||
| .OUTPUT(num, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(diff, TensorType({DT_FLOAT16})) | |||||
| .ATTR(atol, Float, 1e-5) | |||||
| .ATTR(rtol, Float, 1e-3) | |||||
| .OP_END_FACTORY_REG(DataCompare) | |||||
| /** | /** | ||||
| *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 | *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0 | ||||
| *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along | *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along | ||||
| @@ -3650,6 +3679,57 @@ REG_OP(HardMax) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .ATTR(axis, Int, -1) | .ATTR(axis, Int, -1) | ||||
| .OP_END_FACTORY_REG(HardMax) | .OP_END_FACTORY_REG(HardMax) | ||||
| /** | |||||
| * @brief Computes the dot product (inner product) of two tensors. This function does not broadcast. | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li input_x: A Tensor. the first tensor must be 1d. \n | |||||
| * @li input_y: A Tensor. the second tensor must be 1d. \n | |||||
| * @par Outputs: | |||||
| * @li output: A Tensor. Result of the two inputs, must be 1d. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch dot operator. \n | |||||
| */ | |||||
| REG_OP(Dot) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
| .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32})) | |||||
| .OP_END_FACTORY_REG(Dot) | |||||
| /** | |||||
| *@brief Returns a new tensor with boolean elements representing \n | |||||
| *if each element of input is “close” to the corresponding element of other \n | |||||
| *@par Inputs: | |||||
| *Two inputs, including: | |||||
| * @li x1: A tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| * @li x2: A tensor with the same type and shape of x1's. \n | |||||
| *@par Attributes: | |||||
| *@li rtol: An optional float.Defaults to 1e-05. \n | |||||
| *@li atol: An optional float.Defaults to 1e-08. \n | |||||
| *@li equal_nan: An optional bool.Defaults to false. \n | |||||
| *@par Outputs: | |||||
| *y: A Tensor bool with the same shape of x1's. \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with the Pytorch operator isclose. \n | |||||
| */ | |||||
| REG_OP(IsClose) | |||||
| .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_BOOL})) | |||||
| .ATTR(rtol, Float, 1e-05) | |||||
| .ATTR(atol, Float, 1e-08) | |||||
| .ATTR(equal_nan, Bool, false) | |||||
| .OP_END_FACTORY_REG(IsClose) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ | ||||
| @@ -238,6 +238,15 @@ REG_OP(HcomRemoteRead) | |||||
| .REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
| .OP_END_FACTORY_REG(HcomRemoteRead) | .OP_END_FACTORY_REG(HcomRemoteRead) | ||||
| /** | |||||
| * @brief Performs Remote Ref Read of input tensors | |||||
| * @par Inputs: | |||||
| * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length | |||||
| * cache_var: The local base address | |||||
| * local_offset: Skip step length | |||||
| * @par Outputs: | |||||
| * cache_var: The local base address | |||||
| */ | |||||
| REG_OP(HcomRemoteRefRead) | REG_OP(HcomRemoteRefRead) | ||||
| .INPUT(remote, TensorType({DT_UINT64})) | .INPUT(remote, TensorType({DT_UINT64})) | ||||
| .INPUT(cache_var, TensorType({DT_UINT64})) | .INPUT(cache_var, TensorType({DT_UINT64})) | ||||
| @@ -258,6 +267,13 @@ REG_OP(HcomRemoteWrite) | |||||
| .INPUT(local, TensorType::ALL()) | .INPUT(local, TensorType::ALL()) | ||||
| .OP_END_FACTORY_REG(HcomRemoteWrite) | .OP_END_FACTORY_REG(HcomRemoteWrite) | ||||
| /** | |||||
| * @brief Performs Remote Write of input tensors | |||||
| * @par Inputs: | |||||
| * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length | |||||
| * @par Inputs: | |||||
| * local: A Tensor. whose value is length / size_of(Type) | |||||
| */ | |||||
| REG_OP(HcomRemoteScatterWrite) | REG_OP(HcomRemoteScatterWrite) | ||||
| .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) | .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) | ||||
| .INPUT(local, TensorType::ALL()) | .INPUT(local, TensorType::ALL()) | ||||
| @@ -652,6 +652,62 @@ REG_OP(RGBToHSV) | |||||
| /** | /** | ||||
| *@brief Generate a single randomly distorted bounding box for an image . \n | *@brief Generate a single randomly distorted bounding box for an image . \n | ||||
| *@par Inputs: | |||||
| *Input images must be a 4-D tensor. Inputs include: | |||||
| *@li image_size: 1-D, containing [height, width, channels]. | |||||
| *@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding | |||||
| boxes associated with the image. \n | |||||
| *@par Attributes: | |||||
| *@li seed: If either seed or seed2 are set to non-zero, the random number | |||||
| generator is seeded by the given seed. Otherwise, it is seeded by a random seed. | |||||
| *@li seed2: A second seed to avoid seed collision. | |||||
| *@li min_object_covered: The cropped area of the image must contain at least | |||||
| this fraction of any bounding box supplied. The value of this parameter should | |||||
| be non-negative. In the case of 0, the cropped area does not need to overlap | |||||
| any of the bounding boxes supplied . | |||||
| *@li aspect_ratio_range: The cropped area of the image must have an aspect | |||||
| ratio = width / height within this range. | |||||
| *@li max_attempts: Number of attempts at generating a cropped region of the | |||||
| image of the specified constraints. After max_attempts failures, return the | |||||
| entire image. | |||||
| *@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes | |||||
| supplied. If true, assume an implicit bounding box covering the whole input. | |||||
| If false, raise an error . \n | |||||
| *@par Outputs: | |||||
| *@li begin: 1-D, containing [offset_height, offset_width, 0]. | |||||
| *@li size: 1-D, containing [target_height, target_width, -1]. | |||||
| *@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n | |||||
| *@attention Constraints: | |||||
| *Input images can be of different types but output images are always float . \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with tensorflow SampleDistortedBoundingBox operator. | |||||
| */ | |||||
| REG_OP(SampleDistortedBoundingBox) | |||||
| .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
| DT_INT32, DT_INT64 })) | |||||
| .INPUT(bounding_boxes, TensorType({ DT_FLOAT })) | |||||
| .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
| DT_INT32, DT_INT64 })) | |||||
| .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \ | |||||
| DT_INT32, DT_INT64 })) | |||||
| .OUTPUT(bboxes, TensorType({ DT_FLOAT })) | |||||
| .ATTR(seed, Int, 0) | |||||
| .ATTR(seed2, Int, 0) | |||||
| .ATTR(min_object_covered, Float, 0.1f) | |||||
| .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f }) | |||||
| .ATTR(area_range, ListFloat, { 0.05f, 1.0f }) | |||||
| .ATTR(max_attempts, Int, 100) | |||||
| .ATTR(use_image_if_no_bounding_boxes, Bool, false) | |||||
| .OP_END_FACTORY_REG(SampleDistortedBoundingBox) | |||||
| /** | |||||
| *@brief Generate a single randomly distorted bounding box for an image . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *Input images must be a 4-D tensor. Inputs include: | *Input images must be a 4-D tensor. Inputs include: | ||||
| *@li image_size: 1-D, containing [height, width, channels]. | *@li image_size: 1-D, containing [height, width, channels]. | ||||
| @@ -1424,11 +1480,11 @@ REG_OP(Resize) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | ||||
| *@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||||
| *@li ratio: An optional int. Defaults to 1. Downscaling ratio. | |||||
| *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes | *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes | ||||
| *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. | *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input. | ||||
| *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. | *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted. | ||||
| *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n | |||||
| *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *image: A Tensor dtype of uint8. | *image: A Tensor dtype of uint8. | ||||
| @@ -61,8 +61,8 @@ REG_OP(CholeskyGrad) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input x has to be symmetric and positive definite.Inputs include: | *The input x has to be symmetric and positive definite.Inputs include: | ||||
| *x:A Tensor. Must be one of the following types: double, float32. Shape | |||||
| is [..., M, M] . \n | |||||
| *x:A Tensor. Must be one of the following types: double, float32, float16, | |||||
| complex64, complex128. Shape is [..., M, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor. Has the same type as x . \n | *y:A Tensor. Has the same type as x . \n | ||||
| @@ -76,8 +76,10 @@ form square matrices. | |||||
| */ | */ | ||||
| REG_OP(Cholesky) | REG_OP(Cholesky) | ||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
| DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \ | |||||
| DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(Cholesky) | .OP_END_FACTORY_REG(Cholesky) | ||||
| /** | /** | ||||
| @@ -87,8 +89,8 @@ of one or more square matrices . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | ||||
| form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
| *x:A Tensor. Must be one of the following types: double, float32. Shape is | |||||
| [..., M, M] . \n | |||||
| *x:A Tensor. Must be one of the following types: double, float32, | |||||
| complex64, complex128. Shape is [..., M, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *@li y:A Tensor. Has the same type as x. | *@li y:A Tensor. Has the same type as x. | ||||
| @@ -103,9 +105,9 @@ form square matrices. \n | |||||
| */ | */ | ||||
| REG_OP(LogMatrixDeterminant) | REG_OP(LogMatrixDeterminant) | ||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(LogMatrixDeterminant) | .OP_END_FACTORY_REG(LogMatrixDeterminant) | ||||
| /** | /** | ||||
| @@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions | ||||
| form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
| *x:A Tensor. Must be one of the following types: double, float32. Shape is | |||||
| [..., M, M] . \n | |||||
| *x:A Tensor. Must be one of the following types: double, float32, complex64, | |||||
| complex128. Shape is [..., M, M] . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:A Tensor. Has the same type as x . \n | *y:A Tensor. Has the same type as x . \n | ||||
| @@ -129,8 +131,8 @@ form square matrices. | |||||
| */ | */ | ||||
| REG_OP(MatrixDeterminant) | REG_OP(MatrixDeterminant) | ||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(MatrixDeterminant) | .OP_END_FACTORY_REG(MatrixDeterminant) | ||||
| /** | /** | ||||
| @@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions | *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions | ||||
| form square matrices. Inputs include: | form square matrices. Inputs include: | ||||
| *x:A Tensor. Must be one of the following types: double, float. Shape is | |||||
| [..., M, M] . \n | |||||
| *x:A Tensor of input. Shape is [..., M, M] . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *adjoint:An optional bool. Defaults to False.Boolean indicating whether to | *adjoint:An optional bool. Defaults to False.Boolean indicating whether to | ||||
| @@ -159,8 +160,10 @@ form square matrices. \n | |||||
| */ | */ | ||||
| REG_OP(MatrixInverse) | REG_OP(MatrixInverse) | ||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
| .OP_END_FACTORY_REG(MatrixInverse) | .OP_END_FACTORY_REG(MatrixInverse) | ||||
| @@ -169,8 +172,7 @@ REG_OP(MatrixInverse) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input rhs must have the same type as matrix. Inputs include: | *The input rhs must have the same type as matrix. Inputs include: | ||||
| *@li matrix:A Tensor. Must be one of the following types: double, float. | |||||
| Shape is [..., M, M]. | |||||
| *@li matrix:A Tensor of input. Shape is [..., M, M]. | |||||
| *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -189,9 +191,9 @@ dimensions form square matrices. \n | |||||
| */ | */ | ||||
| REG_OP(MatrixSolve) | REG_OP(MatrixSolve) | ||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
| .OP_END_FACTORY_REG(MatrixSolve) | .OP_END_FACTORY_REG(MatrixSolve) | ||||
| @@ -221,8 +223,10 @@ dimensions form square matrices. \n | |||||
| */ | */ | ||||
| REG_OP(MatrixSolveLs) | REG_OP(MatrixSolveLs) | ||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(l2, TensorType({DT_DOUBLE})) | .INPUT(l2, TensorType({DT_DOUBLE})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | ||||
| .ATTR(fast, Bool, true) | .ATTR(fast, Bool, true) | ||||
| @@ -234,8 +238,7 @@ matrices by backsubstitution . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input rhs must have the same type as matrix. Inputs include: | *The input rhs must have the same type as matrix. Inputs include: | ||||
| *@li matrix: A Tensor. Must be one of the following types: double, float. | |||||
| Shape is [..., M, M]. | |||||
| *@li matrix: A Tensor. Shape is [..., M, M]. | |||||
| *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n | ||||
| *@par Attributes: | *@par Attributes: | ||||
| @@ -256,9 +259,12 @@ dimensions form square matrices. \n | |||||
| */ | */ | ||||
| REG_OP(MatrixTriangularSolve) | REG_OP(MatrixTriangularSolve) | ||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .ATTR(lower, Bool, true) | .ATTR(lower, Bool, true) | ||||
| .ATTR(adjoint, Bool, false) | .ATTR(adjoint, Bool, false) | ||||
| .OP_END_FACTORY_REG(MatrixTriangularSolve) | .OP_END_FACTORY_REG(MatrixTriangularSolve) | ||||
| @@ -268,8 +274,7 @@ REG_OP(MatrixTriangularSolve) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| *The input shape of x must be [..., M, N]. Inputs include: | *The input shape of x must be [..., M, N]. Inputs include: | ||||
| *x:A Tensor whose shape is [..., M, N]. Must be one of the following types: | |||||
| double, float . \n | |||||
| *x:A Tensor whose shape is [..., M, N]. \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *full_matrices: An optional bool. Defaults to False. If true, compute | *full_matrices: An optional bool. Defaults to False. If true, compute | ||||
| @@ -289,9 +294,12 @@ dimensions form matrices of size [M, N]. \n | |||||
| */ | */ | ||||
| REG_OP(Qr) | REG_OP(Qr) | ||||
| .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
| .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
| .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE })) | |||||
| .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128 })) | |||||
| .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128 })) | |||||
| .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \ | |||||
| DT_COMPLEX64, DT_COMPLEX128 })) | |||||
| .ATTR(full_matrices, Bool, false) | .ATTR(full_matrices, Bool, false) | ||||
| .OP_END_FACTORY_REG(Qr) | .OP_END_FACTORY_REG(Qr) | ||||
| @@ -384,8 +392,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n | |||||
| */ | */ | ||||
| REG_OP(Lu) | REG_OP(Lu) | ||||
| .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(p, TensorType({DT_INT32, DT_INT64})) | .OUTPUT(p, TensorType({DT_INT32, DT_INT64})) | ||||
| .REQUIRED_ATTR(output_idx_type, Type) | .REQUIRED_ATTR(output_idx_type, Type) | ||||
| .OP_END_FACTORY_REG(Lu) | .OP_END_FACTORY_REG(Lu) | ||||
| @@ -404,8 +412,8 @@ y: Shape is `[..., M, M]` . \n | |||||
| */ | */ | ||||
| REG_OP(MatrixSquareRoot) | REG_OP(MatrixSquareRoot) | ||||
| .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(MatrixSquareRoot) | .OP_END_FACTORY_REG(MatrixSquareRoot) | ||||
| /** | /** | ||||
| @@ -192,7 +192,7 @@ REG_OP(TensorListGetItem) | |||||
| .INPUT(element_shape, TensorType({DT_INT32})) | .INPUT(element_shape, TensorType({DT_INT32})) | ||||
| .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, | ||||
| DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, | ||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE, | |||||
| DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, | |||||
| DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) | ||||
| .ATTR(element_dtype, Type, DT_INT32) | .ATTR(element_dtype, Type, DT_INT32) | ||||
| .OP_END_FACTORY_REG(TensorListGetItem) | .OP_END_FACTORY_REG(TensorListGetItem) | ||||
| @@ -222,6 +222,24 @@ REG_OP(Bucketize) | |||||
| .REQUIRED_ATTR(boundaries, ListFloat) | .REQUIRED_ATTR(boundaries, ListFloat) | ||||
| .OP_END_FACTORY_REG(Bucketize) | .OP_END_FACTORY_REG(Bucketize) | ||||
| /** | |||||
| *@brief Returns a new tensor with the truncated integer values of the elements of input. \n | |||||
| *@par Inputs: | |||||
| *One inputs, including: | |||||
| * @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n | |||||
| *@par Outputs: | |||||
| *y: A tensor with the same type and shape of input_x \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with the Pytorch operator Trunc. \n | |||||
| */ | |||||
| REG_OP(Trunc) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8})) | |||||
| .OP_END_FACTORY_REG(Trunc) | |||||
| /** | /** | ||||
| *@brief Computes the sum along sparse segments of a tensor . \n | *@brief Computes the sum along sparse segments of a tensor . \n | ||||
| @@ -645,6 +663,7 @@ REG_OP(NLLLoss) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(total_weight, TensorType({DT_FLOAT})) | .OUTPUT(total_weight, TensorType({DT_FLOAT})) | ||||
| .ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
| .ATTR(ignore_index, Int, -100) | |||||
| .OP_END_FACTORY_REG(NLLLoss) | .OP_END_FACTORY_REG(NLLLoss) | ||||
| /** | /** | ||||
| @@ -674,6 +693,7 @@ REG_OP(NLLLossGrad) | |||||
| .INPUT(total_weight, TensorType({DT_FLOAT})) | .INPUT(total_weight, TensorType({DT_FLOAT})) | ||||
| .OUTPUT(x_grad, TensorType({DT_FLOAT})) | .OUTPUT(x_grad, TensorType({DT_FLOAT})) | ||||
| .ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
| .ATTR(ignore_index, Int, -100) | |||||
| .OP_END_FACTORY_REG(NLLLossGrad) | .OP_END_FACTORY_REG(NLLLossGrad) | ||||
| /** | /** | ||||
| @@ -884,6 +904,54 @@ REG_OP(LpNorm) | |||||
| .ATTR(keepdim, Bool, false) | .ATTR(keepdim, Bool, false) | ||||
| .ATTR(epsilon, Float, 1e-12) | .ATTR(epsilon, Float, 1e-12) | ||||
| .OP_END_FACTORY_REG(LpNorm) | .OP_END_FACTORY_REG(LpNorm) | ||||
| /** | |||||
| * @brief get complex. | |||||
| * @par Inputs: | |||||
| * @li real: An ND tensor of type float32. double | |||||
| * @li imag: An ND tensor of type float32. double \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * @li out: An ND tensor of type complex64, complex128 \n | |||||
| */ | |||||
| REG_OP(Complex) | |||||
| .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .ATTR(Tout, Type, DT_COMPLEX64) | |||||
| .OP_END_FACTORY_REG(Complex) | |||||
| /** | |||||
| * @brief deal complex. | |||||
| * @par Inputs: | |||||
| * @li input: An ND tensor of type complex64, complex128 \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * @li output: An ND tensor of type float32. double \n | |||||
| */ | |||||
| REG_OP(Imag) | |||||
| .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .ATTR(Tout, Type, DT_FLOAT) | |||||
| .OP_END_FACTORY_REG(Imag) | |||||
| /** | |||||
| * @brief deal complex. | |||||
| * @par Inputs: | |||||
| * @li input: An ND tensor of type complex64, complex128 \n | |||||
| * | |||||
| * @par Outputs: | |||||
| * @li output: An ND tensor of type float32. double \n | |||||
| */ | |||||
| REG_OP(Angle) | |||||
| .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
| .ATTR(Tout, Type, DT_FLOAT) | |||||
| .OP_END_FACTORY_REG(Angle) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ | ||||
| @@ -1022,6 +1022,27 @@ REG_OP(IndexAdd) | |||||
| .ATTR(axis, Int, 0) | .ATTR(axis, Int, 0) | ||||
| .OP_END_FACTORY_REG(IndexAdd) | .OP_END_FACTORY_REG(IndexAdd) | ||||
| /** | |||||
| *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n | |||||
| *@par Inputs: | |||||
| * Two inputs, including: | |||||
| *@li x: A Tensor. Must be one of the following types: | |||||
| * float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||||
| * qint8, quint8, qint32, uint16, complex128, uint32, uint64. | |||||
| *@li diagonal:(int, optional) – the diagonal to consider。\n | |||||
| *@par Outputs: | |||||
| *y: A Tensor. Has the same type as "x" . \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator Triu. | |||||
| */ | |||||
| REG_OP(Triu) | |||||
| .INPUT(x, TensorType::BasicType()) | |||||
| .ATTR(diagonal, Int, 0) | |||||
| .OUTPUT(y, TensorType::BasicType()) | |||||
| .OP_END_FACTORY_REG(Triu) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ | ||||
| @@ -144,6 +144,64 @@ REG_OP(BatchNorm) | |||||
| /** | /** | ||||
| *@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
| *@par Inputs: | |||||
| * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
| *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
| *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
| if input "x" is with format NDC1HWC0. Specifies the scaling factor. | |||||
| *@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
| if input "x" is with format NC1HWC0. Specifies the offset. | |||||
| *@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
| if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
| operation is used for training. | |||||
| *@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be | |||||
| 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
| if the operation is used for training . \n | |||||
| *@par Attributes: | |||||
| *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||||
| *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | |||||
| *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||||
| *@par Outputs: | |||||
| * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
| *@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
| *@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
| if input "x" is with format NDC1HWC0. Specifies the mean of "x". | |||||
| *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | |||||
| Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x". | |||||
| *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | |||||
| Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
| *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
| Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
| *@attention Constraints: | |||||
| *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||||
| then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". | |||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||||
| *@par Third-party framework compatibility | |||||
| *@li Compatible with the TensorFlow operator fused_batch_norm. | |||||
| *@li Compatible with the TensorFlow operator fused_batch_norm_v2. | |||||
| */ | |||||
| REG_OP(BatchNorm3D) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(scale, TensorType({DT_FLOAT})) | |||||
| .INPUT(offset, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) | |||||
| .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
| .ATTR(epsilon, Float, 0.0001) | |||||
| .ATTR(data_format, String, "NCDHW") | |||||
| .ATTR(is_training, Bool, true) | |||||
| .OP_END_FACTORY_REG(BatchNorm3D) | |||||
| /** | |||||
| *@brief Performs batch normalization . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Five inputs, including: (NHWC or NCHW supported) | * Five inputs, including: (NHWC or NCHW supported) | ||||
| *@li x: A 4D Tensor of type float16 or float32. | *@li x: A 4D Tensor of type float16 or float32. | ||||
| @@ -242,6 +300,52 @@ REG_OP(BatchNormGrad) | |||||
| /** | /** | ||||
| *@brief Performs the backpropagation of BatchNorm . \n | *@brief Performs the backpropagation of BatchNorm . \n | ||||
| *@par Inputs: | |||||
| * Five inputs, including: | |||||
| *@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient. | |||||
| *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
| *@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
| *@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm. | |||||
| *@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
| *@par Attributes: | |||||
| *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||||
| *@li data_format: An optional string. Defaults to "NCDHW". | |||||
| *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | |||||
| *@par Outputs: | |||||
| *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||||
| *@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale". | |||||
| *@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset". | |||||
| *@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output. | |||||
| *@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n | |||||
| *@attention Constraints: | |||||
| * The preceding layer of this operator must be operator BatchNorm . \n | |||||
| *@see BatchNorm | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad. | |||||
| */ | |||||
| REG_OP(BatchNorm3DGrad) | |||||
| .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(scale, TensorType({DT_FLOAT})) | |||||
| .INPUT(reserve_space_1, TensorType({DT_FLOAT})) | |||||
| .INPUT(reserve_space_2, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(scale_backprop, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(offset_backprop, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(reserve_space_4, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(reserve_space_5, TensorType({DT_FLOAT})) | |||||
| .ATTR(epsilon, Float, 0.0001) | |||||
| .ATTR(data_format, String, "NCDHW") | |||||
| .ATTR(is_training, Bool, true) | |||||
| .OP_END_FACTORY_REG(BatchNorm3DGrad) | |||||
| /** | |||||
| *@brief Performs the backpropagation of BatchNorm . \n | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Five inputs, including: | * Five inputs, including: | ||||
| *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | ||||
| @@ -1059,7 +1059,7 @@ REG_OP(DeformableConv2D) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A list of 5 integers. Specifies the dilation factor for each | * @li dilations: A list of 5 integers. Specifies the dilation factor for each | ||||
| @@ -1119,7 +1119,7 @@ REG_OP(Conv3D) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Three attributes: | * Three attributes: | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| @@ -1167,7 +1167,7 @@ REG_OP(Conv3DBackpropInput) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Three attributes: | * Three attributes: | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li dilations: A tuple/list of 5 integers, The dilation factor for each | * @li dilations: A tuple/list of 5 integers, The dilation factor for each | ||||
| @@ -1267,7 +1267,7 @@ REG_OP(LSTM) | |||||
| * dimension of input. | * dimension of input. | ||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | * The N, C and D dimensions must be 1. Has the same format as "x". | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| @@ -1319,7 +1319,7 @@ REG_OP(Conv3DBackpropFilter) | |||||
| * dimension of input. | * dimension of input. | ||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | * The N, C and D dimensions must be 1. Has the same format as "x". | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| @@ -1369,7 +1369,7 @@ REG_OP(Conv3DBackpropFilterD) | |||||
| *@par Attributes: | *@par Attributes: | ||||
| * Five attributes: | * Five attributes: | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li dilations: A tuple/list of 5 integers, | * @li dilations: A tuple/list of 5 integers, | ||||
| * The dilation factor for each dimension of input. | * The dilation factor for each dimension of input. | ||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | * The N, C and D dimensions must be 1. Has the same format as "x". | ||||
| @@ -1422,7 +1422,7 @@ REG_OP(Conv3DTranspose) | |||||
| * dimension of input. | * dimension of input. | ||||
| * The N, C and D dimensions must be 1. Has the same format as "x". | * The N, C and D dimensions must be 1. Has the same format as "x". | ||||
| * @li groups: Number of blocked connections from input channels to output | * @li groups: Number of blocked connections from input channels to output | ||||
| * channels. Reserved. | |||||
| * channels. | |||||
| * @li data_format: An optional string from: "NDHWC", "NCDHW". | * @li data_format: An optional string from: "NDHWC", "NCDHW". | ||||
| * Defaults to "NDHWC". Specify the data format of the input and output data. | * Defaults to "NDHWC". Specify the data format of the input and output data. | ||||
| * @li output_padding: The size will be added in the output shape. | * @li output_padding: The size will be added in the output shape. | ||||
| @@ -1624,7 +1624,7 @@ REG_OP(Conv2DTransposeD) | |||||
| * of the input. | * of the input. | ||||
| * @li ksize: A tuple/list of 2 integers.kernel size. | * @li ksize: A tuple/list of 2 integers.kernel size. | ||||
| *@par Attributes: | *@par Attributes: | ||||
| * Three attributes: | |||||
| * Four attributes: | |||||
| * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension | ||||
| * of input. Defaults to [1, 1, 1, 1] | * of input. Defaults to [1, 1, 1, 1] | ||||
| * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. | ||||
| @@ -968,8 +968,9 @@ REG_OP(SPP) | |||||
| * Three inputs, including: | * Three inputs, including: | ||||
| *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | ||||
| * map. | * map. | ||||
| *@li rois: A tensor of type float16 or float32, with shape | |||||
| *@li rois: A tensor of type float16 or float32, with 3D shape | |||||
| * [batch, 5, roi_max_num], describing the RIOs. | * [batch, 5, roi_max_num], describing the RIOs. | ||||
| * roi_max_num must be less than or equal to 6000 and must be divided by 16. | |||||
| *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying | *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying | ||||
| * the number of ROIs per batch . \n | * the number of ROIs per batch . \n | ||||
| @@ -1604,6 +1605,50 @@ REG_OP(NonMaxSuppressionV7) | |||||
| .ATTR(max_boxes_size, Int, 0) | .ATTR(max_boxes_size, Int, 0) | ||||
| .OP_END_FACTORY_REG(NonMaxSuppressionV7) | .OP_END_FACTORY_REG(NonMaxSuppressionV7) | ||||
| /** | |||||
| *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n | |||||
| *@par Inputs: | |||||
| * Three inputs, including: | |||||
| *@li features: A 5HD Tensor list of type float32 or float16. | |||||
| *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, | |||||
| * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1". | |||||
| *@par Attributes: | |||||
| *@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois". | |||||
| *@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates. | |||||
| *@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features" | |||||
| * to the original image. | |||||
| *@li pooled_height: A optional attribute of type int32, specifying the H dimension. | |||||
| *@li pooled_width: A optional attribute of type int32, specifying the W dimension. | |||||
| *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency | |||||
| * of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", | |||||
| * which is a floating point number. Defaults to "0". | |||||
| *@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n | |||||
| *@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n | |||||
| *@par Outputs: | |||||
| * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. | |||||
| * The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", | |||||
| * "pooled_width", and "features", respectively. | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with mmdetection SingleRoIExtractor operator. | |||||
| */ | |||||
| REG_OP(RoiExtractor) | |||||
| .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(finest_scale, Int, 56) | |||||
| .ATTR(roi_scale_factor, Float, 0) | |||||
| .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 }) | |||||
| .ATTR(pooled_height, Int, 7) | |||||
| .ATTR(pooled_width, Int, 7) | |||||
| .ATTR(sample_num, Int, 0) | |||||
| .ATTR(pool_mode, String, "avg") | |||||
| .ATTR(aligned, Bool, true) | |||||
| .OP_END_FACTORY_REG(RoiExtractor) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ | ||||
| @@ -1233,6 +1233,47 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2) | |||||
| .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .ATTR(reduction, String, "mean") | .ATTR(reduction, String, "mean") | ||||
| .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) | .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) | ||||
| /** | |||||
| * @brief Calculate the PoissonNllLoss function. | |||||
| * target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li input_x: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * | |||||
| * @par Inputs: | |||||
| * @li target: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| * @par Attributes: | |||||
| * four Attributes, including: | |||||
| * @li log_input: An optional bool. Defaults to "True" \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li full: An optional bool. Defaults to "False" \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li eps: An optional float. Defaults to "1e-8" \n | |||||
| * | |||||
| * @par Attributes: | |||||
| * @li reduction: An optional string. Defaults to "mean" \n | |||||
| * @par Outputs: | |||||
| * loss: A Tensor has same element type as two inputs. \n | |||||
| * @par Third-party framework compatibility | |||||
| * Compatible with the Pytorch operator PoissonNllLoss. \n | |||||
| */ | |||||
| REG_OP(PoissonNllLoss) | |||||
| .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
| .ATTR(log_input, Bool, true) | |||||
| .ATTR(full, Bool, false) | |||||
| .ATTR(eps, Float, 1e-8) | |||||
| .ATTR(reduction, String, "mean") | |||||
| .OP_END_FACTORY_REG(PoissonNllLoss) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ | ||||
| @@ -20,7 +20,34 @@ | |||||
| */ | */ | ||||
| #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | ||||
| #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | ||||
| #include "graph/operator_reg.h" | |||||
| #include "nn_pooling_ops.h" | #include "nn_pooling_ops.h" | ||||
| namespace ge { | |||||
| /** | |||||
| * @brief Says whether the targets are in the top "k" predictions . \n | |||||
| * @par Inputs: | |||||
| * Three inputs, including: | |||||
| * @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor. | |||||
| * @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. | |||||
| * @li k: A 1D Tensor of the same type as "targets". | |||||
| * Specifies the number of top elements to look at for computing precision . \n | |||||
| * @par Outputs: | |||||
| * precision: A Tensor of type bool . \n | |||||
| * @attention Constraints: | |||||
| * @li targets must be non-negative tensor. | |||||
| * @par Third-party framework compatibility | |||||
| * @li Compatible with the TensorFlow operator InTopKV2. | |||||
| */ | |||||
| REG_OP(InTopKV2) | |||||
| .INPUT(predictions, TensorType({DT_FLOAT})) | |||||
| .INPUT(targets, TensorType(IndexNumberType)) | |||||
| .INPUT(k, TensorType({IndexNumberType})) | |||||
| .OUTPUT(precision, TensorType({DT_BOOL})) | |||||
| .OP_END_FACTORY_REG(InTopKV2) | |||||
| }// namespace ge | |||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | ||||
| @@ -223,7 +223,29 @@ REG_OP(Relu6Grad) | |||||
| .INPUT(features, TensorType::RealNumberType()) | .INPUT(features, TensorType::RealNumberType()) | ||||
| .OUTPUT(backprops, TensorType::RealNumberType()) | .OUTPUT(backprops, TensorType::RealNumberType()) | ||||
| .OP_END_FACTORY_REG(Relu6Grad) | .OP_END_FACTORY_REG(Relu6Grad) | ||||
| /** | |||||
| *@brief Calculate the elu_grad_v2 function. | |||||
| *Applies the element-wise function: | |||||
| * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha . | |||||
| *@par Inputs: | |||||
| *One inputs, including: | |||||
| * @li grads: A tensor. Must be one of the following types: | |||||
| * float16, float32. | |||||
| * @li activations: A tensor. Must be one of the following types: | |||||
| * float16, float32. | |||||
| * | |||||
| *@par Outputs: | |||||
| *y: A Tensor with the same type and shape of grads's. | |||||
| * | |||||
| *@par Attributes: | |||||
| *@li alpha: scalar parameter, default value = 1.0 | |||||
| */ | |||||
| REG_OP(EluGradV2) | |||||
| .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
| .ATTR(alpha, Float, 1.0) | |||||
| .OP_END_FACTORY_REG(EluGradV2) | |||||
| /** | /** | ||||
| * @brief Compute sigmoid of "x" element-wise . \n | * @brief Compute sigmoid of "x" element-wise . \n | ||||
| @@ -842,6 +864,26 @@ REG_OP(SoftShrinkGrad) | |||||
| .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
| .ATTR(lambd, Float, 0.5) | .ATTR(lambd, Float, 0.5) | ||||
| .OP_END_FACTORY_REG(SoftShrinkGrad) | .OP_END_FACTORY_REG(SoftShrinkGrad) | ||||
| /** | |||||
| *@brief Calculate -ln(1+e^(-x)). \n | |||||
| *@par Inputs: | |||||
| *One inputs, including: | |||||
| * @li x: A tensor. Must be one of the following types: | |||||
| * float16, float32. \n | |||||
| *@par Outputs: | |||||
| *One outputs, including: | |||||
| * @li y: A tensor with the same type and shape of x's. \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with the Pytorch operator LogSigmoid. \n | |||||
| */ | |||||
| REG_OP(LogSigmoid) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ | |||||
| .OP_END_FACTORY_REG(LogSigmoid) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ | ||||
| @@ -37,7 +37,7 @@ namespace ge { | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| * This operator is a BatchNorm fusion operator for updating the moving | * This operator is a BatchNorm fusion operator for updating the moving | ||||
| * averages for training. | * averages for training. | ||||
| * This operator is used in conjunction with BNTrainingUpdate. | |||||
| * This operator is used in conjunction with BNTrainingReduce. | |||||
| */ | */ | ||||
| REG_OP(BNTrainingReduce) | REG_OP(BNTrainingReduce) | ||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | ||||
| @@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce) | |||||
| .OUTPUT(square_sum, TensorType({DT_FLOAT})) | .OUTPUT(square_sum, TensorType({DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(BNTrainingReduce) | .OP_END_FACTORY_REG(BNTrainingReduce) | ||||
| /** | |||||
| *@brief Performs reduced batch normalization . \n | |||||
| *@par Inputs: | |||||
| *x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n | |||||
| *@par Outputs: | |||||
| *@li sum: A 3D Tensor of type float32 for SUM reduced "x". | |||||
| *@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n | |||||
| *@attention Constraints: | |||||
| * This operator is a BatchNorm fusion operator for updating the moving | |||||
| * averages for training. | |||||
| * This operator is used in conjunction with BN3DTrainingReduce. | |||||
| */ | |||||
| REG_OP(BN3DTrainingReduce) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(sum, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(square_sum, TensorType({DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(BN3DTrainingReduce) | |||||
| /** | /** | ||||
| *@brief Performs the backpropagation of BatchNorm . \n | *@brief Performs the backpropagation of BatchNorm . \n | ||||
| @@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad) | |||||
| .ATTR(epsilon, Float, 0.0001) | .ATTR(epsilon, Float, 0.0001) | ||||
| .OP_END_FACTORY_REG(BNTrainingReduceGrad) | .OP_END_FACTORY_REG(BNTrainingReduceGrad) | ||||
| /** | |||||
| *@brief Performs the backpropagation of BatchNorm . \n | |||||
| *@par Inputs: | |||||
| * Seven inputs, including: | |||||
| *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for | |||||
| * the gradient. | |||||
| *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
| *@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the mean of "x". | |||||
| *@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the variance of "x". | |||||
| *@li scale: A 6D Tensor of type float32, with format NDC1HWC0. | |||||
| *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the mean of "x". | |||||
| *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the variance of "x" . \n | |||||
| *@par Attributes: | |||||
| *epsilon: An optional float32. Defaults to "0.0001". A small float number | |||||
| * added to the variance of "x" . \n | |||||
| *@par Outputs: | |||||
| *y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset | |||||
| * of "x" . \n | |||||
| *@attention Constraints: | |||||
| * The preceding layer of this operator must be BN3DTrainingReduceGrad . \n | |||||
| *@see BN3DTrainingReduceGrad | |||||
| */ | |||||
| REG_OP(BN3DTrainingReduceGrad) | |||||
| .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(diff_scale, TensorType({DT_FLOAT})) | |||||
| .INPUT(diff_offset, TensorType({DT_FLOAT})) | |||||
| .INPUT(scale, TensorType({DT_FLOAT})) | |||||
| .INPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
| .INPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .ATTR(epsilon, Float, 0.0001) | |||||
| .OP_END_FACTORY_REG(BN3DTrainingReduceGrad) | |||||
| /** | /** | ||||
| *@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
| @@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad) | |||||
| *@attention Constraints: | *@attention Constraints: | ||||
| *@li This operator is a BatchNorm fusion operator for updating the moving | *@li This operator is a BatchNorm fusion operator for updating the moving | ||||
| averages for training. | averages for training. | ||||
| *This operator is used in conjunction with BNTrainingReduce. | |||||
| *This operator is used in conjunction with BNTrainingUpdate. | |||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | ||||
| * root instruction. | * root instruction. | ||||
| */ | */ | ||||
| @@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate) | |||||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(BNTrainingUpdate) | .OP_END_FACTORY_REG(BNTrainingUpdate) | ||||
| /** | |||||
| *@brief Performs reduced batch normalization . \n | |||||
| *@par Inputs: | |||||
| * Seven inputs, including: (NDC1HWC0 supported) | |||||
| *@li x: A 6D Tensor of type float16 or float32. | |||||
| *@li sum: A 6D Tensor of type float32 for the output of operator | |||||
| * BN3DTrainingUpdate. | |||||
| *@li square_sum: A 6D Tensor of type float32 for the output of operator | |||||
| * BN3DTrainingUpdate. | |||||
| *@li scale: A 6D Tensor of type float32, for the scaling factor. | |||||
| *@li offset: A 6D Tensor of type float32, for the scaling offset. | |||||
| *@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
| *@li variance: A 6D Tensor of type float32, for the updated variance . \n | |||||
| *@par Attributes: | |||||
| *@li epsilon: A required float32, specifying the small value added to variance | |||||
| * to avoid dividing by zero. | |||||
| *@li factor: A required float32, specifying the weight for updating the mean | |||||
| * and variance . \n | |||||
| *@par Outputs: | |||||
| * Five outputs, including: (NDC1HWC0 supported) | |||||
| *@li y: A 6D Tensor of type float16 or float32, for normalized "x". | |||||
| *@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
| *@li variance: A 6D Tensor of type float32, for the updated variance. | |||||
| *@li batch_mean: A 6D Tensor of type float32, for the mean of "x". | |||||
| *@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n | |||||
| *@attention Constraints: | |||||
| *@li This operator is a BatchNorm fusion operator for updating the moving | |||||
| averages for training. | |||||
| *This operator is used in conjunction with BN3DTrainingUpdate. | |||||
| *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||||
| * root instruction. | |||||
| */ | |||||
| REG_OP(BN3DTrainingUpdate) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(sum, TensorType({DT_FLOAT})) | |||||
| .INPUT(square_sum, TensorType({DT_FLOAT})) | |||||
| .INPUT(scale, TensorType({DT_FLOAT})) | |||||
| .INPUT(offset, TensorType({DT_FLOAT})) | |||||
| .INPUT(mean, TensorType({DT_FLOAT})) | |||||
| .INPUT(variance, TensorType({DT_FLOAT})) | |||||
| .REQUIRED_ATTR(factor, Float) | |||||
| .REQUIRED_ATTR(epsilon, Float) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .OUTPUT(mean, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(variance, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(BN3DTrainingUpdate) | |||||
| /** | /** | ||||
| *@brief Performs batch normalization for inference . \n | *@brief Performs batch normalization for inference . \n | ||||
| @@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad) | |||||
| .OUTPUT(diff_offset, TensorType({DT_FLOAT})) | .OUTPUT(diff_offset, TensorType({DT_FLOAT})) | ||||
| .OP_END_FACTORY_REG(BNTrainingUpdateGrad) | .OP_END_FACTORY_REG(BNTrainingUpdateGrad) | ||||
| /** | |||||
| *@brief Performs the backpropagation of BatchNorm . \n | |||||
| *@par Inputs: | |||||
| * Four inputs, including: | |||||
| *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, | |||||
| * for the gradient. | |||||
| *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
| *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the mean of "x". | |||||
| *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
| * for the variance of "x" . \n | |||||
| *@par Attributes: | |||||
| *epsilon: An optional float32. Defaults to "0.0001". A small float number | |||||
| * added to the variance of "x" . \n | |||||
| *@par Outputs: | |||||
| *@li diff_scale: A Tensor of type float32, with format NDC1HWC0, | |||||
| * for the offset of "scale". | |||||
| *@li diff_offset: A Tensor of type float32, with format NDC1HWC0, | |||||
| * for the offset of "offset" . \n | |||||
| */ | |||||
| REG_OP(BN3DTrainingUpdateGrad) | |||||
| .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
| .INPUT(batch_mean, TensorType({DT_FLOAT})) | |||||
| .INPUT(batch_variance, TensorType({DT_FLOAT})) | |||||
| .ATTR(epsilon, Float, 0.0001) | |||||
| .OUTPUT(diff_scale, TensorType({DT_FLOAT})) | |||||
| .OUTPUT(diff_offset, TensorType({DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad) | |||||
| /** | /** | ||||
| *@brief Performs the backpropagation of BatchNorm for inference . \n | *@brief Performs the backpropagation of BatchNorm for inference . \n | ||||
| @@ -857,8 +857,8 @@ REG_OP(SliceDV2) | |||||
| * @li sorted = true | * @li sorted = true | ||||
| * @li It's unstable sorted indices on the platform of Ascend310 | * @li It's unstable sorted indices on the platform of Ascend310 | ||||
| * @par Third-party framework compatibility | |||||
| * @li Compatible with the TensorFlow operator TopK. | |||||
| * @par Restrictions: | |||||
| * Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead. | |||||
| */ | */ | ||||
| REG_OP(TopKD) | REG_OP(TopKD) | ||||
| .INPUT(x, TensorType::RealNumberType()) | .INPUT(x, TensorType::RealNumberType()) | ||||
| @@ -883,6 +883,44 @@ REG_OP(TopKD) | |||||
| * Number of top elements to look for along the last dimension (along each row | * Number of top elements to look for along the last dimension (along each row | ||||
| * for matrices) . \n | * for matrices) . \n | ||||
| * @par Attributes: | |||||
| * @li sorted: An optional bool. Defaults to true. | |||||
| * If true, the resulting "k" elements will be sorted by the values in descending | |||||
| * order. | |||||
| * @li dim: An optional int. Defaults to -1. For reserved use. | |||||
| * @li largest: An optional bool. Defaults to true. For reserved use. \n | |||||
| * @par Outputs: | |||||
| * @li values: A Tensor, specifying the sorted data. Has the same type as | |||||
| * "input". | |||||
| * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n | |||||
| * @see TopK() | |||||
| * @par Third-party framework compatibility | |||||
| * @li Compatible with the TensorFlow operator TopKV2. | |||||
| */ | |||||
| REG_OP(TopKV2) | |||||
| .INPUT(x, TensorType::RealNumberType()) | |||||
| .INPUT(k, TensorType({DT_INT32})) | |||||
| .OUTPUT(values, TensorType::RealNumberType()) | |||||
| .OUTPUT(indices, TensorType({DT_INT32})) | |||||
| .ATTR(sorted, Bool, true) | |||||
| .ATTR(dim, Int, -1) | |||||
| .ATTR(largest, Bool, true) | |||||
| .OP_END_FACTORY_REG(TopKV2) | |||||
| /** | |||||
| * @brief Finds values and indices of the "k" largest elements for the last | |||||
| * dimension . \n | |||||
| * @par Inputs: | |||||
| * Two inputs, including: | |||||
| * @li x: A 1D or higher tensor of type BasicType, with the last dimension | |||||
| * at least "k". | |||||
| * @li k: A 0D Tensor of type int32. | |||||
| * Number of top elements to look for along the last dimension (along each row | |||||
| * for matrices) . \n | |||||
| * @par Attributes: | * @par Attributes: | ||||
| * @li sorted: An optional bool. Defaults to true. | * @li sorted: An optional bool. Defaults to true. | ||||
| * If true, the resulting "k" elements will be sorted by the values in descending | * If true, the resulting "k" elements will be sorted by the values in descending | ||||
| @@ -2103,6 +2141,34 @@ REG_OP(StridedSliceV2) | |||||
| .OUTPUT(y, TensorType::BasicType()) | .OUTPUT(y, TensorType::BasicType()) | ||||
| .OP_END_FACTORY_REG(StridedSliceV2) | .OP_END_FACTORY_REG(StridedSliceV2) | ||||
| /** | |||||
| *@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n | |||||
| *@par Inputs: | |||||
| *Three inputs, including: | |||||
| * @li x: A tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| *@li assist1: A tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| *@li assist2: A tensor. Must be one of the following types: | |||||
| * float16, float32, int32. \n | |||||
| * @par Attributes: | |||||
| * @li dim: A required int. Used to select the dimension of this tensor. \n | |||||
| *@par Outputs: | |||||
| *y: A Tensor with the same type and shape of input_x's. \n | |||||
| *@par Third-party framework compatibility | |||||
| *Compatible with the Pytorch operator IndexFill. \n | |||||
| */ | |||||
| REG_OP(IndexFillD) | |||||
| .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
| .REQUIRED_ATTR(dim, Int) | |||||
| .OP_END_FACTORY_REG(IndexFillD) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ | ||||
| @@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad) | |||||
| REG_OP(SparseTensorDenseMatMul) | REG_OP(SparseTensorDenseMatMul) | ||||
| .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) | .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) | ||||
| .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ | .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \ | ||||
| DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16})) | |||||
| DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64})) | |||||
| .INPUT(x1_shape, TensorType({DT_INT64})) | .INPUT(x1_shape, TensorType({DT_INT64})) | ||||
| .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ | |||||
| .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ | |||||
| DT_COMPLEX128, DT_FLOAT16})) | DT_COMPLEX128, DT_FLOAT16})) | ||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \ | |||||
| .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \ | |||||
| DT_COMPLEX128, DT_FLOAT16})) | DT_COMPLEX128, DT_FLOAT16})) | ||||
| .ATTR(adjoint_a, Bool, false) | .ATTR(adjoint_a, Bool, false) | ||||
| .ATTR(adjoint_b, Bool, false) | .ATTR(adjoint_b, Bool, false) | ||||
| @@ -26,6 +26,24 @@ | |||||
| namespace ge { | namespace ge { | ||||
| /** | |||||
| *@brief Computes the inverse 1-dimensional discrete Fourier transform over the | |||||
| inner-most dimension of `x`. \n | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
| *@par Outputs: | |||||
| *@li y: A complex tensor of the same rank as `x`. \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with TensorFlow IFFT operator. | |||||
| */ | |||||
| REG_OP(IFFT) | |||||
| .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(IFFT) | |||||
| /** | /** | ||||
| *@brief Real-valued fast Fourier transform . \n | *@brief Real-valued fast Fourier transform . \n | ||||
| @@ -47,6 +65,84 @@ REG_OP(RFFT) | |||||
| .OUTPUT(y, TensorType({DT_COMPLEX64})) | .OUTPUT(y, TensorType({DT_COMPLEX64})) | ||||
| .OP_END_FACTORY_REG(RFFT) | .OP_END_FACTORY_REG(RFFT) | ||||
| /** | |||||
| *@brief Inverse real-valued fast Fourier transform . \n | |||||
| *@par Inputs: | |||||
| *@li x: A complex64 tensor. | |||||
| *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n | |||||
| *@par Outputs: | |||||
| *@li y: A float32 tensor of the same rank as `input`. The inner-most | |||||
| dimension of `input` is replaced with the `fft_length` samples of its inverse | |||||
| 1D Fourier transform . \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with TensorFlow IRFFT operator. | |||||
| */ | |||||
| REG_OP(IRFFT) | |||||
| .INPUT(x, TensorType({DT_COMPLEX64})) | |||||
| .INPUT(fft_length, TensorType({DT_INT32})) | |||||
| .OUTPUT(y, TensorType({DT_FLOAT})) | |||||
| .OP_END_FACTORY_REG(IRFFT) | |||||
| /** | |||||
| *@brief 2D fast Fourier transform. \n | |||||
| *@par Inputs: | |||||
| *@li x: A complex64 tensor.. | |||||
| *@par Outputs: | |||||
| *@li y: A complex64 tensor of the same shape as `input`. The inner-most 2 | |||||
| dimensions of `input` are replaced with their 2D Fourier transform.\n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with TensorFlow FFT2D operator. | |||||
| */ | |||||
| REG_OP(FFT2D) | |||||
| .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(FFT2D) | |||||
| /** | |||||
| *@brief Calculate the one-dimensional discrete Fourier transform on the | |||||
| innermost dimension of the input. \n | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
| *@par Outputs: | |||||
| *@li y: A complex tensor with the same shape as input. The innermost dimension | |||||
| of the input is replaced by its 1-dimensional Fourier transform. \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with TensorFlow FFT operator. | |||||
| */ | |||||
| REG_OP(FFT) | |||||
| .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(FFT) | |||||
| /** | |||||
| *@brief Calculate the inverse 1-dimensional discrete Fourier transform on the | |||||
| innermost dimension of the input. \n | |||||
| *@par Inputs: | |||||
| *@li x: A Tensor. Must be the following types: complex64, complex128. \n | |||||
| *@par Outputs: | |||||
| *@li y: A complex tensor with the same shape as input. The innermost dimension | |||||
| of the input is replaced by its inverse two-dimensional Fourier transform. \n | |||||
| *@par Third-party framework compatibility | |||||
| * Compatible with TensorFlow IFFT2D operator. | |||||
| */ | |||||
| REG_OP(IFFT2D) | |||||
| .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128})) | |||||
| .OP_END_FACTORY_REG(IFFT2D) | |||||
| } // namespace ge | } // namespace ge | ||||
| #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ | ||||
| @@ -62,8 +62,8 @@ REG_OP(Split) | |||||
| *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. | |||||
| *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
| *@li split_dim: A required int32. Specifies the dimension along which to split. No default value. | |||||
| *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n | *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n | ||||
| @@ -94,12 +94,12 @@ REG_OP(SplitD) | |||||
| *@par Inputs: | *@par Inputs: | ||||
| * Three inputs, including: | * Three inputs, including: | ||||
| *@li x: An ND Tensor. | *@li x: An ND Tensor. | ||||
| *Must be one of the following types: | |||||
| *@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
| *@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n | |||||
| *Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. | |||||
| *@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
| *@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n | |||||
| *@par Attributes: | *@par Attributes: | ||||
| *num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
| *num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | ||||
| @@ -129,9 +129,9 @@ REG_OP(SplitV) | |||||
| *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
| *@par Attributes: | *@par Attributes: | ||||
| *@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
| *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value. | |||||
| *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n | |||||
| *@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension. | |||||
| *@li split_dim: A required int32. Specifies the dimension along which to split. No default value. | |||||
| *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n | |||||
| *@par Outputs: | *@par Outputs: | ||||
| *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n | ||||
| @@ -488,7 +488,7 @@ include: | |||||
| */ | */ | ||||
| REG_OP(AsString) | REG_OP(AsString) | ||||
| .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ | .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ | ||||
| DT_DOUBLE, DT_BOOL})) | |||||
| DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128})) | |||||
| .OUTPUT(y, TensorType({DT_STRING})) | .OUTPUT(y, TensorType({DT_STRING})) | ||||
| .ATTR(precision, Int, -1) | .ATTR(precision, Int, -1) | ||||
| .ATTR(scientific, Bool, false) | .ATTR(scientific, Bool, false) | ||||
| @@ -46,6 +46,12 @@ typedef enum tagRtChipType { | |||||
| CHIP_END, | CHIP_END, | ||||
| } rtChipType_t; | } rtChipType_t; | ||||
| typedef enum tagRtAicpuScheType { | |||||
| SCHEDULE_SOFTWARE = 0, /* Software Schedule */ | |||||
| SCHEDULE_SOFTWARE_OPT, | |||||
| SCHEDULE_HARDWARE, /* HWTS Schedule */ | |||||
| } rtAicpuScheType; | |||||
| typedef enum tagRtVersion { | typedef enum tagRtVersion { | ||||
| VER_BEGIN = 0, | VER_BEGIN = 0, | ||||
| VER_NA = VER_BEGIN, | VER_NA = VER_BEGIN, | ||||
| @@ -184,6 +190,19 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); | RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); | ||||
| /** | |||||
| * @ingroup | |||||
| * @brief get device feature ability by device id, such as task schedule ability. | |||||
| * @param [in] deviceId | |||||
| * @param [in] moduleType | |||||
| * @param [in] featureType | |||||
| * @param [out] value | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value); | |||||
| #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -62,6 +62,11 @@ typedef enum tagRtFeatureType { | |||||
| FEATURE_TYPE_RSV | FEATURE_TYPE_RSV | ||||
| } rtFeatureType_t; | } rtFeatureType_t; | ||||
| typedef enum tagRtDeviceFeatureType { | |||||
| FEATURE_TYPE_SCHE, | |||||
| FEATURE_TYPE_END, | |||||
| } rtDeviceFeatureType_t; | |||||
| typedef enum tagMemcpyInfo { | typedef enum tagMemcpyInfo { | ||||
| MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, | ||||
| MEMCPY_INFO_RSV | MEMCPY_INFO_RSV | ||||
| @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | ||||
| #define RT_KERNEL_CUSTOM_AICPU (0x08) | #define RT_KERNEL_CUSTOM_AICPU (0x08) | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief kernel mode | |||||
| **/ | |||||
| #define RT_DEFAULT_KERNEL_MODE (0x00) | |||||
| #define RT_NORMAL_KERNEL_MODE (0x01) | |||||
| #define RT_ALL_KERNEL_MODE (0x02) | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief kernel L1 Fusion Dump bit flags | * @brief kernel L1 Fusion Dump bit flags | ||||
| @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief register device binary with all kernel | |||||
| * @param [in] bin device binary description | |||||
| * @param [out] handle device binary handle | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief register fast memeory device binary | * @brief register fast memeory device binary | ||||
| @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u | |||||
| RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | ||||
| rtSmDesc_t *smDesc, rtStream_t stream); | rtSmDesc_t *smDesc, rtStream_t stream); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief launch kernel with handle to device | |||||
| * @param [in] handle program | |||||
| * @param [in] devFunc device function description. | |||||
| * @param [in] blockDim block dimentions | |||||
| * @param [in] args argments address for kernel function | |||||
| * @param [in] argsSize argements size | |||||
| * @param [in] smDesc shared memory description | |||||
| * @param [in] stream associated stream | |||||
| * @param [in] kernelInfo kernel info | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief launch kernel to device | * @brief launch kernel to device | ||||
| @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { | |||||
| RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | ||||
| RT_MODEL_TASK_STREAM_LABEL_GOTO, | RT_MODEL_TASK_STREAM_LABEL_GOTO, | ||||
| RT_MODEL_TASK_MODEL_EXIT, | RT_MODEL_TASK_MODEL_EXIT, | ||||
| RT_MODEL_TASK_ALL_KERNEL, | |||||
| } rtModelTaskType_t; | } rtModelTaskType_t; | ||||
| typedef enum tagModelStreamType { | typedef enum tagModelStreamType { | ||||
| @@ -127,6 +128,18 @@ typedef struct tagKernelTaskInfo { | |||||
| uint16_t *argsOffset; | uint16_t *argsOffset; | ||||
| } rtKernelTaskInfo_t; | } rtKernelTaskInfo_t; | ||||
| typedef struct tagAllKernelTaskInfo { | |||||
| uint16_t blockDim; | |||||
| uint16_t argsCount; | |||||
| uint16_t argsSize; | |||||
| uint16_t reserved; | |||||
| void *devfunc; | |||||
| void *handle; | |||||
| uint8_t *smDesc; | |||||
| uint8_t *args; | |||||
| uint16_t *argsOffset; | |||||
| } rtAllKernelTaskInfo_t; | |||||
| typedef struct tagKernelTaskInfoEx { | typedef struct tagKernelTaskInfoEx { | ||||
| uint32_t flags; | uint32_t flags; | ||||
| uint32_t argsSize; | uint32_t argsSize; | ||||
| @@ -251,6 +264,7 @@ typedef struct tagTaskInfo { | |||||
| union { | union { | ||||
| rtKernelTaskInfoEx_t kernelTaskEx; | rtKernelTaskInfoEx_t kernelTaskEx; | ||||
| rtKernelTaskInfo_t kernelTask; | rtKernelTaskInfo_t kernelTask; | ||||
| rtAllKernelTaskInfo_t allKernelTask; | |||||
| rtEventTaskInfo_t eventTask; | rtEventTaskInfo_t eventTask; | ||||
| rtStreamSwitchTaskInfo_t streamSwitchTask; | rtStreamSwitchTaskInfo_t streamSwitchTask; | ||||
| rtStreamActiveTaskInfo_t streamActiveTask; | rtStreamActiveTaskInfo_t streamActiveTask; | ||||
| @@ -1,72 +1,137 @@ | |||||
| /** | |||||
| * @file tune_api.h | |||||
| * | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n | |||||
| * | |||||
| * This program is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n | |||||
| * 描述:mstune调优接口头文件 | |||||
| */ | |||||
| /** @defgroup mstune mstune调优接口 */ | |||||
| #ifndef TUNE_API_H | |||||
| #define TUNE_API_H | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "graph/graph.h" | |||||
| #include "ge/ge_api.h" | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * | |||||
| * mstune status | |||||
| */ | |||||
| enum MsTuneStatus { | |||||
| MSTUNE_SUCCESS, /** tune success */ | |||||
| MSTUNE_FAILED, /** tune failed */ | |||||
| }; | |||||
| // Option key: for train options sets | |||||
| const std::string MSTUNE_SELF_KEY = "mstune"; | |||||
| const std::string MSTUNE_GEINIT_KEY = "initialize"; | |||||
| const std::string MSTUNE_GESESS_KEY = "session"; | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * @par 描述: 命令行调优 | |||||
| * | |||||
| * @attention 无 | |||||
| * @param option [IN] 调优参数 | |||||
| * @param msg [OUT] 调优异常下返回信息 | |||||
| * @retval #MSTUNE_SUCCESS 执行成功 | |||||
| * @retval #MSTUNE_FAILED 执行失败 | |||||
| * @par 依赖: | |||||
| * @li tune_api.cpp:该接口所属的开发包。 | |||||
| * @li tune_api.h:该接口声明所在的头文件。 | |||||
| * @see 无 | |||||
| * @since | |||||
| */ | |||||
| MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg); | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * @par 描述: 梯度调优 | |||||
| * | |||||
| * @attention 无 | |||||
| * @param tuningGraph [IN] 调优图 | |||||
| * @param dependGraph [IN] 调优依赖图 | |||||
| * @param session [IN] ge连接会话 | |||||
| * @param option [IN] 参数集. 包含调优参数及ge参数 | |||||
| * @retval #MSTUNE_SUCCESS 执行成功 | |||||
| * @retval #MSTUNE_FAILED 执行失败 | |||||
| * @par 依赖: | |||||
| * @li tune_api.cpp:该接口所属的开发包。 | |||||
| * @li tune_api.h:该接口声明所在的头文件。 | |||||
| * @see 无 | |||||
| * @since | |||||
| */ | |||||
| extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, | |||||
| ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option); | |||||
| #endif | |||||
| /** | |||||
| * @file tune_api.h | |||||
| * | |||||
| * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n | |||||
| * | |||||
| * This program is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n | |||||
| * 描述:mstune调优接口头文件 | |||||
| */ | |||||
| /** @defgroup mstune mstune调优接口 */ | |||||
| #ifndef TUNE_API_H | |||||
| #define TUNE_API_H | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "graph/graph.h" | |||||
| #include "ge/ge_api.h" | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * | |||||
| * mstune status | |||||
| */ | |||||
| enum MsTuneStatus { | |||||
| MSTUNE_SUCCESS, /** tune success */ | |||||
| MSTUNE_FAILED, /** tune failed */ | |||||
| }; | |||||
| // Option key: for train options sets | |||||
| const std::string MSTUNE_SELF_KEY = "mstune"; | |||||
| const std::string MSTUNE_GEINIT_KEY = "initialize"; | |||||
| const std::string MSTUNE_GESESS_KEY = "session"; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| struct RunnerInitConfig { | |||||
| // onilne online | |||||
| std::string profPath; | |||||
| std::string parserPath; | |||||
| // ncs only | |||||
| std::vector<uint32_t> devList; | |||||
| }; | |||||
| struct RunnerOpInfo { | |||||
| std::string opName; | |||||
| uint64_t opCostTime; | |||||
| uint64_t aicoreCostTime; | |||||
| // gradient_split only | |||||
| std::string modelName; | |||||
| std::string opType; | |||||
| std::vector<uint64_t> start; | |||||
| std::vector<uint64_t> end; | |||||
| }; | |||||
| struct RunnerModelInfo { | |||||
| uint64_t totalCostTime; | |||||
| }; | |||||
| struct RunnerRunResult { | |||||
| std::vector<RunnerModelInfo> modelInfo; | |||||
| std::vector<RunnerOpInfo> opInfo; | |||||
| }; | |||||
| struct RunnerResult { | |||||
| uint64_t totalCostTime; | |||||
| std::map<std::string, uint64_t> opCostTime; | |||||
| std::map<std::string, uint64_t> aicoreCostTime; | |||||
| }; | |||||
| struct RunnerDataBuf { | |||||
| void *ptr = nullptr; | |||||
| size_t size = 0; | |||||
| }; | |||||
| struct AOEBufferData { | |||||
| std::shared_ptr<uint8_t> data = nullptr; | |||||
| uint64_t length; | |||||
| }; | |||||
| struct RunnerConfig { | |||||
| bool isProf; | |||||
| uint32_t loop; | |||||
| // offline only | |||||
| std::vector<RunnerDataBuf> input; | |||||
| std::vector<RunnerDataBuf> output; | |||||
| std::string modelPath; | |||||
| RunnerDataBuf modelData; | |||||
| // online only | |||||
| uint32_t devId; | |||||
| std::vector<std::vector<ge::Tensor>> inputs; | |||||
| std::vector<ge::Graph> dependGraph; // run graph (for training) | |||||
| }; | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * @par 描述: 命令行调优 | |||||
| * | |||||
| * @attention 无 | |||||
| * @param option [IN] 调优参数 | |||||
| * @param msg [OUT] 调优异常下返回信息 | |||||
| * @retval #MSTUNE_SUCCESS 执行成功 | |||||
| * @retval #MSTUNE_FAILED 执行失败 | |||||
| * @par 依赖: | |||||
| * @li tune_api.cpp:该接口所属的开发包。 | |||||
| * @li tune_api.h:该接口声明所在的头文件。 | |||||
| * @see 无 | |||||
| * @since | |||||
| */ | |||||
| MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg); | |||||
| /** | |||||
| * @ingroup mstune | |||||
| * @par 描述: 梯度调优 | |||||
| * | |||||
| * @attention 无 | |||||
| * @param tuningGraph [IN] 调优图 | |||||
| * @param dependGraph [IN] 调优依赖图 | |||||
| * @param session [IN] ge连接会话 | |||||
| * @param option [IN] 参数集. 包含调优参数及ge参数 | |||||
| * @retval #MSTUNE_SUCCESS 执行成功 | |||||
| * @retval #MSTUNE_FAILED 执行失败 | |||||
| * @par 依赖: | |||||
| * @li tune_api.cpp:该接口所属的开发包。 | |||||
| * @li tune_api.h:该接口声明所在的头文件。 | |||||
| * @see 无 | |||||
| * @since | |||||
| */ | |||||
| extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph, | |||||
| ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option); | |||||
| #endif | |||||