| @@ -13,6 +13,11 @@ set(FBS_FILES | |||
| ) | |||
| ms_build_flatbuffers(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}../../schema generated_fbs_files ${SERVER_FLATBUFFER_OUTPUT}) | |||
| if(ENABLE_D) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/aicpu/aicpu_ops) | |||
| add_subdirectory(backend/kernel_compiler/aicpu/aicpu_ops) | |||
| endif() | |||
| if(ENABLE_CPU) | |||
| if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64") | |||
| set(PLATFORM_ARM64 "on") | |||
| @@ -18,6 +18,10 @@ if(ENABLE_D) | |||
| "rts/*.cc" | |||
| "hccl/*.cc" | |||
| ) | |||
| file(GLOB_RECURSE AICPU_OPS_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "aicpu/aicpu_ops/*.cc" | |||
| ) | |||
| list(REMOVE_ITEM D_SRC_LIST ${AICPU_OPS_SRC}) | |||
| add_compile_definitions(ENABLE_D) | |||
| endif() | |||
| @@ -0,0 +1,64 @@ | |||
| set(NORMAL_CMAKE_C_COMPILER ${CMAKE_C_COMPILER}) | |||
| set(NORMAL_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER}) | |||
| if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||
| set(TOOLCHAIN_PATH $ENV{ASCEND_CUSTOM_PATH}/toolkit/toolchain) | |||
| else() | |||
| set(TOOLCHAIN_PATH /usr/local/Ascend/toolkit/toolchain) | |||
| endif() | |||
| set(CMAKE_C_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-gcc) | |||
| set(CMAKE_CXX_COMPILER ${TOOLCHAIN_PATH}/hcc/bin/aarch64-target-linux-gnu-g++) | |||
| if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER}) | |||
| set(AICPU_PROTO_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/aicpu_op_proto/aicpu_tensor.proto | |||
| ) | |||
| ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${AICPU_PROTO_SRC}) | |||
| set(AICPU_SRC | |||
| ${PROTO_SRCS} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_base.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/kernel_log.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_async_event.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_context.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_pulse.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_sharder.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/random_choice_with_mask_kernels.cc | |||
| ) | |||
| add_library(aicpu_kernels SHARED | |||
| ${AICPU_SRC} | |||
| ) | |||
| target_compile_options(aicpu_kernels PRIVATE | |||
| -march=armv8-a | |||
| -O2 | |||
| -fvisibility-inlines-hidden | |||
| -fvisibility=hidden | |||
| -fno-strict-aliasing | |||
| -fno-common | |||
| ) | |||
| target_link_libraries(aicpu_kernels PRIVATE | |||
| -ldl | |||
| -shared | |||
| PUBLIC | |||
| ${SECUREC_LIBRARY} | |||
| -Wl,--whole-archive | |||
| -Wl,--no-whole-archive | |||
| -Wl,-Bsymbolic | |||
| -rdynamic | |||
| mindspore::protobuf | |||
| -pthread | |||
| ) | |||
| set(INSTALL_LIBRARY_DIR lib) | |||
| install(TARGETS aicpu_kernels OPTIONAL | |||
| EXPORT aicpu_kernels-targets | |||
| LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} | |||
| ) | |||
| endif() | |||
| set(CMAKE_C_COMPILER ${NORMAL_CMAKE_C_COMPILER}) | |||
| set(CMAKE_CXX_COMPILER ${NORMAL_CMAKE_CXX_COMPILER}) | |||
| @@ -0,0 +1,118 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| syntax = "proto3"; | |||
| package aicpuops; | |||
| message AttrValue { | |||
| message ArrayValue { | |||
| repeated bytes s = 2; //"array(string)" | |||
| repeated int64 i = 3 [ packed = true ]; //"array(int)" | |||
| repeated float f = 4 [ packed = true ]; //"array(float)" | |||
| repeated bool b = 5 [ packed = true ]; //"array(bool)" | |||
| repeated int32 type = 6 [ packed = true ]; //"array(type)" | |||
| repeated TensorShape shape = 7; //"array(shape)" | |||
| repeated Tensor tensor = 8; //"array(tensor)" | |||
| } | |||
| oneof value { | |||
| ArrayValue array = 1; | |||
| bytes s = 2; //"string" | |||
| int64 i = 3; //"int" | |||
| float f = 4; //"float" | |||
| bool b = 5; //"bool" | |||
| int32 type = 6; //"type" | |||
| TensorShape shape = 7; //"shape" | |||
| Tensor tensor = 8; //"tensor" | |||
| } | |||
| } | |||
| message DynamicIdx { | |||
| int32 idx = 1; | |||
| int32 num = 2; | |||
| } | |||
| message NodeDef { | |||
| string op = 2; | |||
| map<string, AttrValue> attrs = 3; | |||
| repeated Tensor inputs = 4; | |||
| repeated Tensor outputs = 5; | |||
| map<string, DynamicIdx> dym_inputs = 6; | |||
| map<string, DynamicIdx> dym_outputs = 7; | |||
| } | |||
| message TensorShape { | |||
| // One dimension of the tensor. | |||
| message Dim { | |||
| // size must >=0 | |||
| int64 size = 1; | |||
| }; | |||
| // group dim info | |||
| repeated Dim dim = 2; | |||
| // If true, the number of dimensions in the shape is unknown. | |||
| // If true, "dim.size()" must be 0. | |||
| bool unknown_rank = 3; | |||
| // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE" | |||
| int32 data_format = 4; | |||
| }; | |||
| message Tensor { | |||
| // tensor shape info | |||
| TensorShape tensor_shape = 1; | |||
| // tensor content data type | |||
| int32 tensor_type = 2; | |||
| // tensor memory device | |||
| // data located memory device , "DDR" "HBM" OR "NONE" | |||
| string mem_device = 3; | |||
| string name = 4; | |||
| uint64 data_ptr = 5; | |||
| uint64 data_size = 6; | |||
| } | |||
| enum DataType { | |||
| MS_FLOAT32 = 0; | |||
| MS_FLOAT16 = 1; | |||
| MS_INT8 = 2; | |||
| MS_INT32 = 3; | |||
| MS_UINT8 = 4; | |||
| MS_INT16 = 6; | |||
| MS_UINT16 = 7; | |||
| MS_UINT32 = 8; | |||
| MS_INT64 = 9; | |||
| MS_UINT64 = 10; | |||
| MS_FLOAT64 = 11; | |||
| MS_BOOL = 12; | |||
| MS_STRING = 13; | |||
| MS_DUAL_SUB_INT8 = 14; | |||
| MS_DUAL_SUB_UINT8 = 15; | |||
| MS_COMPLEX64 = 16; | |||
| MS_COMPLEX128 = 17; | |||
| MS_QINT8 = 18; | |||
| MS_QINT16 = 19; | |||
| MS_QINT32 = 20; | |||
| MS_QUINT8 = 21; | |||
| MS_QUINT16 = 22; | |||
| MS_RESOURCE = 23; | |||
| MS_STRING_REF = 24; | |||
| MS_DUAL = 25; | |||
| MS_UNKNOWN = 26; | |||
| } | |||
| @@ -0,0 +1,137 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "aicpu_sharder/aicpu_async_event.h" | |||
| #include <string> | |||
| #include "common/kernel_log.h" | |||
| #include "aicpu_sharder/aicpu_context.h" | |||
| namespace aicpu { | |||
| AsyncEventManager &AsyncEventManager::GetInstance() { | |||
| static AsyncEventManager async_event_manager; | |||
| return async_event_manager; | |||
| } | |||
| void AsyncEventManager::Register(const NotifyFunc ¬ify) { notify_func_ = notify; } | |||
| void AsyncEventManager::NotifyWait(void *notify_param, const uint32_t param_len) { | |||
| if (notify_func_ != nullptr) { | |||
| notify_func_(notify_param, param_len); | |||
| } | |||
| } | |||
| bool AsyncEventManager::GenTaskInfoFromCtx(AsyncTaskInfo *task_info) { | |||
| if (task_info == nullptr) { | |||
| AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed, task_info is nullptr."); | |||
| return false; | |||
| } | |||
| (void)aicpu::GetTaskAndStreamId(&task_info->task_id, &task_info->stream_id); | |||
| std::string wait_id_value; | |||
| std::string ker_wait_id(aicpu::kContextKeyWaitId); | |||
| auto status = aicpu::GetThreadLocalCtx(ker_wait_id, &wait_id_value); | |||
| if (status != aicpu::AICPU_ERROR_NONE) { | |||
| AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, ker_wait_id.c_str()); | |||
| return false; | |||
| } | |||
| task_info->wait_id = atoi(wait_id_value.c_str()); | |||
| std::string wait_type_value; | |||
| std::string key_wait_type(aicpu::kContextKeyWaitType); | |||
| status = aicpu::GetThreadLocalCtx(key_wait_type, &wait_type_value); | |||
| if (status != aicpu::AICPU_ERROR_NONE) { | |||
| AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_wait_type.c_str()); | |||
| return false; | |||
| } | |||
| task_info->wait_type = atoi(wait_type_value.c_str()); | |||
| std::string start_tick_value; | |||
| std::string key_start_tick(aicpu::kContextKeyStartTick); | |||
| status = aicpu::GetThreadLocalCtx(key_start_tick, &start_tick_value); | |||
| if (status != aicpu::AICPU_ERROR_NONE) { | |||
| AICPU_LOGE("GetThreadLocalCtx failed, ret=%d, key=%s.", status, key_start_tick.c_str()); | |||
| return false; | |||
| } | |||
| task_info->start_tick = atol(start_tick_value.c_str()); | |||
| status = aicpu::GetOpname(aicpu::GetAicpuThreadIndex(), &task_info->op_name); | |||
| if (status != aicpu::AICPU_ERROR_NONE) { | |||
| AICPU_LOGE("GetOpname failed, ret=%d.", status); | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool AsyncEventManager::RegEventCb(const uint32_t event_id, const uint32_t sub_event_id, | |||
| const EventProcessCallBack &cb) { | |||
| if (cb == nullptr) { | |||
| AICPU_LOGE("AsyncEventManager RegEventCb failed, cb is nullptr."); | |||
| return false; | |||
| } | |||
| AsyncTaskInfo task_info; | |||
| task_info.task_cb = cb; | |||
| if (!GenTaskInfoFromCtx(&task_info)) { | |||
| AICPU_LOGE("AsyncEventManager GenTaskInfoFromCtx failed."); | |||
| return false; | |||
| } | |||
| AsyncEventInfo info; | |||
| info.event_id = event_id; | |||
| info.sub_event_id = sub_event_id; | |||
| { | |||
| std::unique_lock<std::mutex> lk(map_mutex_); | |||
| auto iter = asyncTaskMap_.find(info); | |||
| if (iter != asyncTaskMap_.end()) { | |||
| AICPU_LOGE("AsyncEventManager RegEventCb failed."); | |||
| return false; | |||
| } | |||
| asyncTaskMap_[info] = task_info; | |||
| } | |||
| AICPU_LOGI( | |||
| "AsyncEventManager RegEventCb success, event_id[%u], subeventId[%u], taskId[%lu]," | |||
| " streamId[%u], waitType[%u], waitId[%u], opName[%s], startTick[%lu].", | |||
| event_id, sub_event_id, task_info.task_id, task_info.stream_id, task_info.wait_type, task_info.wait_id, | |||
| task_info.op_name.c_str(), task_info.start_tick); | |||
| return true; | |||
| } | |||
| void AsyncEventManager::ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param) { | |||
| AICPU_LOGI("AsyncEventManager proc event_id = %d, sub_event_id = %d", event_id, sub_event_id); | |||
| AsyncEventInfo info; | |||
| info.event_id = event_id; | |||
| info.sub_event_id = sub_event_id; | |||
| EventProcessCallBack taskCb = nullptr; | |||
| { | |||
| std::unique_lock<std::mutex> lk(map_mutex_); | |||
| auto iter = asyncTaskMap_.find(info); | |||
| if (iter == asyncTaskMap_.end()) { | |||
| AICPU_LOGW("AsyncEventManager no async task to deal with."); | |||
| return; | |||
| } | |||
| taskCb = iter->second.task_cb; | |||
| asyncTaskMap_.erase(iter); | |||
| } | |||
| if (taskCb != nullptr) { | |||
| taskCb(param); | |||
| } | |||
| AICPU_LOGI("AsyncEventManager proc end!"); | |||
| return; | |||
| } | |||
| } // namespace aicpu | |||
| void AicpuNotifyWait(void *notify_param, const uint32_t param_len) { | |||
| aicpu::AsyncEventManager::GetInstance().NotifyWait(notify_param, param_len); | |||
| return; | |||
| } | |||
| bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const aicpu::EventProcessCallBack &cb) { | |||
| return aicpu::AsyncEventManager::GetInstance().RegEventCb(event_id, sub_event_id, cb); | |||
| } | |||
| @@ -0,0 +1,144 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_ASYNC_EVENT_H_ | |||
| #define AICPU_OPS_AICPU_ASYNC_EVENT_H_ | |||
| #include <functional> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include <mutex> | |||
| #include "aicpu_sharder/aicpu_context.h" | |||
| namespace aicpu { | |||
| using NotifyFunc = std::function<void(void *param, const uint32_t param_len)>; | |||
| using EventProcessCallBack = std::function<void(void *param)>; | |||
| struct AsyncEventInfo { | |||
| uint32_t event_id; | |||
| uint32_t sub_event_id; | |||
| bool operator==(const AsyncEventInfo &info) { | |||
| return (event_id == info.event_id) && (sub_event_id == info.sub_event_id); | |||
| } | |||
| }; | |||
| inline bool operator<(const AsyncEventInfo &info1, const AsyncEventInfo &info2) { | |||
| return (info1.event_id < info2.event_id) || | |||
| ((info1.event_id == info2.event_id) && (info1.sub_event_id < info2.sub_event_id)); | |||
| } | |||
| struct AsyncTaskInfo { | |||
| uint64_t start_tick; | |||
| std::string op_name; | |||
| uint8_t wait_type; | |||
| uint32_t wait_id; | |||
| uint64_t task_id; | |||
| uint32_t stream_id; | |||
| EventProcessCallBack task_cb; | |||
| }; | |||
| struct AsyncNotifyInfo { | |||
| uint8_t wait_type; | |||
| uint32_t wait_id; | |||
| uint64_t task_id; | |||
| uint32_t stream_id; | |||
| uint32_t ret_code; | |||
| aicpu::aicpuContext_t ctx; | |||
| }; | |||
| class AsyncEventManager { | |||
| public: | |||
| /** | |||
| * Get the unique object of this class | |||
| */ | |||
| static AsyncEventManager &GetInstance(); | |||
| /** | |||
| * Register notify callback function | |||
| * @param notify wait notify callback function | |||
| */ | |||
| void Register(const NotifyFunc ¬ify); | |||
| /** | |||
| * Notify wait task | |||
| * @param notify_param notify param info | |||
| * @param param_len notify_param len | |||
| */ | |||
| void NotifyWait(void *notify_param, const uint32_t param_len); | |||
| /** | |||
| * Register Event callback function, async op call | |||
| * @param eventID EventId | |||
| * @param sub_event_id queue id | |||
| * @param cb Event callback function | |||
| * @return whether register success | |||
| */ | |||
| bool RegEventCb(const uint32_t event_id, const uint32_t sub_event_id, const EventProcessCallBack &cb); | |||
| /** | |||
| * Process event | |||
| * @param event_id EventId | |||
| * @param sub_event_id queue id | |||
| * @param param event param | |||
| */ | |||
| void ProcessEvent(const uint32_t event_id, const uint32_t sub_event_id, void *param = nullptr); | |||
| private: | |||
| AsyncEventManager() : notify_func_(nullptr) {} | |||
| ~AsyncEventManager() = default; | |||
| AsyncEventManager(const AsyncEventManager &) = delete; | |||
| AsyncEventManager &operator=(const AsyncEventManager &) = delete; | |||
| AsyncEventManager(AsyncEventManager &&) = delete; | |||
| AsyncEventManager &operator=(AsyncEventManager &&) = delete; | |||
| // generate task info from ctx | |||
| bool GenTaskInfoFromCtx(AsyncTaskInfo *task_info); | |||
| // wait notify function | |||
| NotifyFunc notify_func_; | |||
| std::mutex map_mutex_; | |||
| std::map<AsyncEventInfo, AsyncTaskInfo> asyncTaskMap_; | |||
| }; | |||
| } // namespace aicpu | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| /** | |||
| * Notify wait task | |||
| * @param notify_param notify info | |||
| * @param param_len | |||
| */ | |||
| __attribute__((weak)) void AicpuNotifyWait(void *notify_param, const uint32_t param_len); | |||
| /** | |||
| * Register Event callback function, async op call | |||
| * @param info Registered event information | |||
| * @param cb Event callback function | |||
| * @return whether register success | |||
| */ | |||
| __attribute__((weak)) bool AicpuRegEventCb(const uint32_t event_id, const uint32_t sub_event_id, | |||
| const aicpu::EventProcessCallBack &cb); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // AICPU_OPS_AICPU_ASYNC_EVENT_H_ | |||
| @@ -0,0 +1,308 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "aicpu_sharder/aicpu_context.h" | |||
| #include <map> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <mutex> | |||
| #include <thread> | |||
| #include <utility> | |||
| #include "common/kernel_log.h" | |||
| namespace { | |||
| // current thread context | |||
| aicpu::aicpuContext_t g_cur_ctx; | |||
| // task monitor context | |||
| std::unique_ptr<std::string[]> g_opsname(nullptr); | |||
| thread_local uint32_t g_thread_index = UINT32_MAX; | |||
| uint32_t g_aicpu_core_cnt = 0; | |||
| thread_local std::map<std::string, std::string> g_thread_local_ctx; | |||
| thread_local aicpu::streamAndTaskId_t g_stream_and_task_id; | |||
| // aicpu run mode | |||
| uint32_t g_run_mode = aicpu::AicpuRunMode::THREAD_MODE; | |||
| // context info | |||
| std::mutex default_mutex; | |||
| std::vector<std::map<std::string, std::string>> g_default_thread_ctx; | |||
| std::mutex prof_mutex; | |||
| std::vector<std::map<std::string, std::string>> g_prof_thread_ctx; | |||
| std::mutex debug_mutex; | |||
| std::vector<std::map<std::string, std::string>> g_debug_thread_ctx; | |||
| std::mutex func_map_mutex; | |||
| std::map<uint32_t, std::map<uint32_t, std::function<void(void *)>>> g_func_map; | |||
| std::map<std::string, std::string> &GetThreadCtx(aicpu::CtxType type, uint32_t thread_index) { | |||
| if (type == aicpu::CTX_DEBUG) { | |||
| std::unique_lock<std::mutex> mutex(default_mutex); | |||
| if (thread_index >= g_debug_thread_ctx.size()) { | |||
| g_debug_thread_ctx.resize(thread_index + 1); | |||
| } | |||
| return g_debug_thread_ctx[thread_index]; | |||
| } else if (type == aicpu::CTX_PROF) { | |||
| std::unique_lock<std::mutex> mutex(prof_mutex); | |||
| if (thread_index >= g_prof_thread_ctx.size()) { | |||
| g_prof_thread_ctx.resize(thread_index + 1); | |||
| } | |||
| return g_prof_thread_ctx[thread_index]; | |||
| } else { | |||
| std::unique_lock<std::mutex> mutex(debug_mutex); | |||
| if (thread_index >= g_default_thread_ctx.size()) { | |||
| g_default_thread_ctx.resize(thread_index + 1); | |||
| } | |||
| return g_default_thread_ctx[thread_index]; | |||
| } | |||
| } | |||
| } // namespace | |||
| namespace aicpu { | |||
| status_t aicpuSetContext(aicpuContext_t *ctx) { | |||
| g_cur_ctx = *ctx; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t aicpuGetContext(aicpuContext_t *ctx) { | |||
| *ctx = g_cur_ctx; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt) { | |||
| if (aicpu_core_cnt == 0) { | |||
| AICPU_LOGE("invalid aicpu core count[%u]", aicpu_core_cnt); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| g_aicpu_core_cnt = aicpu_core_cnt; | |||
| AICPU_LOGI("aicpu core count[%u]", aicpu_core_cnt); | |||
| g_opsname.reset(new (std::nothrow) std::string[aicpu_core_cnt]); | |||
| if (g_opsname == nullptr) { | |||
| AICPU_LOGE("malloc ops name memory for task monitor failed"); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| for (uint32_t index = 0; index < aicpu_core_cnt; ++index) { | |||
| g_opsname[index] = "null"; | |||
| } | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t SetAicpuThreadIndex(uint32_t thread_index) { | |||
| g_thread_index = thread_index; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| uint32_t GetAicpuThreadIndex() { return g_thread_index; } | |||
| status_t SetOpname(const std::string &opname) { | |||
| if (g_opsname != nullptr && g_thread_index < g_aicpu_core_cnt) { | |||
| AICPU_LOGI("set op name to %s for thread[%u]", opname.c_str(), g_thread_index); | |||
| g_opsname[g_thread_index] = opname; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| // maintenance function, if failed just print event log | |||
| AICPU_LOGEVENT( | |||
| "set op name[%s] failed, thread index[%u] should be less than total aicpu core count[%u]," | |||
| " and ops name array addr[%p] cannot null", | |||
| opname.c_str(), g_thread_index, g_aicpu_core_cnt, g_opsname.get()); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t GetOpname(uint32_t thread_index, std::string *opname) { | |||
| *opname = "null"; | |||
| if (g_opsname != nullptr && thread_index < g_aicpu_core_cnt) { | |||
| *opname = g_opsname[thread_index]; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| // maintenance function, if failed just print event log | |||
| AICPU_LOGEVENT( | |||
| "get op name failed, thread index[%u] should be less than total aicpu core count[%u]," | |||
| " and ops name array addr[%p] cannot null", | |||
| g_thread_index, g_aicpu_core_cnt, g_opsname.get()); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id) { | |||
| g_stream_and_task_id.task_id = task_id; | |||
| g_stream_and_task_id.stream_id = stream_id; | |||
| AICPU_LOGI("Set task_id:[%lu] and stream_id:[%u] success.", task_id, stream_id); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id) { | |||
| *task_id = g_stream_and_task_id.task_id; | |||
| *stream_id = g_stream_and_task_id.stream_id; | |||
| AICPU_LOGI("Get task_id:[%lu] and stream_id:[%u] success.", *task_id, *stream_id); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t SetAicpuRunMode(uint32_t run_mode) { | |||
| g_run_mode = run_mode; | |||
| AICPU_LOGI("Set run_mode:[%u] success.", run_mode); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t GetAicpuRunMode(uint32_t *run_mode) { | |||
| *run_mode = g_run_mode; | |||
| AICPU_LOGI("Get run_mode:[%u] success.", *run_mode); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t SetThreadLocalCtx(const std::string &key, const std::string &value) { | |||
| if (key.empty()) { | |||
| AICPU_LOGE("set thread local context failed, key is empty"); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| try { | |||
| g_thread_local_ctx[key] = value; | |||
| } catch (std::exception &e) { | |||
| AICPU_LOGE("set thread local context failed, %s", e.what()); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t GetThreadLocalCtx(const std::string &key, std::string *value) { | |||
| if (key.empty()) { | |||
| AICPU_LOGE("get thread local context failed, key is empty"); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| auto iter = g_thread_local_ctx.find(key); | |||
| if (iter != g_thread_local_ctx.end()) { | |||
| *value = iter->second; | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| AICPU_LOGW("get thread local context failed, no such key[%s]", key.c_str()); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| status_t RemoveThreadLocalCtx(const std::string &key) { | |||
| auto iter = g_thread_local_ctx.find(key); | |||
| if (iter != g_thread_local_ctx.end()) { | |||
| g_thread_local_ctx.erase(iter); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| AICPU_LOGE("remove thread local context failed, no such key[%s]", key.c_str()); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index) { | |||
| AICPU_LOGI("Get all thread ctx info begin, thread index:%u", thread_index); | |||
| auto &ctx = GetThreadCtx(type, thread_index); | |||
| return ctx; | |||
| } | |||
| status_t RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func) { | |||
| std::lock_guard<std::mutex> lock(func_map_mutex); | |||
| std::map<uint32_t, std::function<void(void *)>> &sub_map = g_func_map[event_id]; | |||
| auto it = sub_map.insert(std::make_pair(subevent_id, func)); | |||
| if (it.second == false) { | |||
| AICPU_LOGE( | |||
| "register event call function failed, repulicate register callback function by event_id[%u] " | |||
| "subevent_id[%u]", | |||
| event_id, subevent_id); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param) { | |||
| std::lock_guard<std::mutex> lock(func_map_mutex); | |||
| auto iter = g_func_map.find(event_id); | |||
| if (iter == g_func_map.end()) { | |||
| AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]", | |||
| event_id, event_id); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second; | |||
| auto sub_iter = sub_map.find(subevent_id); | |||
| if (sub_iter == sub_map.end()) { | |||
| AICPU_LOGE("do event callback function failed, cannot find callback function by event_id[%u] subevent_id[%u]", | |||
| event_id, event_id); | |||
| return AICPU_ERROR_FAILED; | |||
| } | |||
| (sub_iter->second)(param); | |||
| // erase func after call | |||
| sub_map.erase(sub_iter); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| status_t UnRegisterCallback(uint32_t event_id, uint32_t subevent_id) { | |||
| std::lock_guard<std::mutex> lock(func_map_mutex); | |||
| auto iter = g_func_map.find(event_id); | |||
| if (iter == g_func_map.end()) { | |||
| AICPU_LOGEVENT( | |||
| "skip unregister event callback function, cannot find callback function by event_id[%u] " | |||
| "subevent_id[%u]", | |||
| event_id, event_id); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| std::map<uint32_t, std::function<void(void *)>> &sub_map = iter->second; | |||
| auto sub_iter = sub_map.find(subevent_id); | |||
| if (sub_iter == sub_map.end()) { | |||
| AICPU_LOGEVENT( | |||
| "skip unregister event callback function, cannot find callback function by event_id[%u] " | |||
| "subevent_id[%u]", | |||
| event_id, event_id); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| sub_map.erase(sub_iter); | |||
| return AICPU_ERROR_NONE; | |||
| } | |||
| } // namespace aicpu | |||
| aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key, const std::string &value) { | |||
| if (key.empty()) { | |||
| AICPU_LOGE("Set thread context failed, context type[%d], key is empty", type); | |||
| return aicpu::AICPU_ERROR_FAILED; | |||
| } | |||
| auto &ctx = GetThreadCtx(type, g_thread_index); | |||
| try { | |||
| ctx[key] = value; | |||
| } catch (std::exception &e) { | |||
| AICPU_LOGE("Set thread context failed, context type[%d], %s", type, e.what()); | |||
| return aicpu::AICPU_ERROR_FAILED; | |||
| } | |||
| return aicpu::AICPU_ERROR_NONE; | |||
| } | |||
| aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value) { | |||
| if (key.empty()) { | |||
| AICPU_LOGE("Get thread context failed, context type[%d], key is empty", type); | |||
| return aicpu::AICPU_ERROR_FAILED; | |||
| } | |||
| auto &ctx = GetThreadCtx(type, g_thread_index); | |||
| auto iter = ctx.find(key); | |||
| if (iter != ctx.end()) { | |||
| *value = iter->second; | |||
| return aicpu::AICPU_ERROR_NONE; | |||
| } | |||
| AICPU_LOGE("Get thread context failed, context type[%d], no such key[%s]", type, key.c_str()); | |||
| return aicpu::AICPU_ERROR_FAILED; | |||
| } | |||
| aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key) { | |||
| auto &ctx = GetThreadCtx(type, g_thread_index); | |||
| auto iter = ctx.find(key); | |||
| if (iter != ctx.end()) { | |||
| ctx.erase(iter); | |||
| return aicpu::AICPU_ERROR_NONE; | |||
| } | |||
| AICPU_LOGE("Remove thread context failed, context type[%d], no such key[%s]", type, key.c_str()); | |||
| return aicpu::AICPU_ERROR_FAILED; | |||
| } | |||
| @@ -0,0 +1,231 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_CONTEXT_H_ | |||
| #define AICPU_OPS_AICPU_CONTEXT_H_ | |||
| #include <sys/types.h> | |||
| #include <cstdint> | |||
| #include <string> | |||
| #include <map> | |||
| #include <functional> | |||
| #include "common/kernel_util.h" | |||
| namespace aicpu { | |||
| typedef struct { | |||
| uint32_t device_id; // device id | |||
| uint32_t tsId; // ts id | |||
| pid_t host_pid; // host pid | |||
| uint32_t vf_id; // vf id | |||
| } aicpuContext_t; | |||
| enum AicpuRunMode : uint32_t { | |||
| PROCESS_PCIE_MODE = 0, // 1910/1980/1951 dc, with host mode | |||
| PROCESS_SOCKET_MODE = 1, // MDC | |||
| THREAD_MODE = 2, // ctrlcpu/minirc/lhisi | |||
| INVALID_MODE, | |||
| }; | |||
| typedef struct { | |||
| uint32_t stream_id; | |||
| uint64_t task_id; | |||
| } streamAndTaskId_t; | |||
| typedef enum { | |||
| AICPU_ERROR_NONE = 0, // success | |||
| AICPU_ERROR_FAILED = 1, // failed | |||
| } status_t; | |||
| enum CtxType : int32_t { CTX_DEFAULT = 0, CTX_PROF, CTX_DEBUG }; | |||
| constexpr auto kContextKeyOpName = "opname"; | |||
| constexpr auto kContextKeyPhaseOneFlag = "phaseOne"; | |||
| constexpr auto kContextKeyWaitType = "waitType"; | |||
| constexpr auto kContextKeyWaitId = "waitId"; | |||
| constexpr auto kContextKeyStartTick = "startTick"; | |||
| constexpr auto kContextKeyDrvSubmitTick = "drvSubmitTick"; | |||
| constexpr auto kContextKeyDrvSchedTick = "drvSchedTick"; | |||
| constexpr auto kContextKeyKernelType = "kernelType"; | |||
| /** | |||
| * set aicpu context for current thread. | |||
| * @param [in]ctx aicpu context | |||
| * @return status whether this operation success | |||
| */ | |||
| AICPU_VISIBILITY_API status_t aicpuSetContext(aicpuContext_t *ctx); | |||
| /** | |||
| * get aicpu context from current thread. | |||
| * @param [out]ctx aicpu context | |||
| * @return status whether this operation success | |||
| */ | |||
| AICPU_VISIBILITY_API status_t aicpuGetContext(aicpuContext_t *ctx); | |||
| /** | |||
| * init context for task monitor, called in compute process start. | |||
| * @param [in]aicpu_core_cnt aicpu core number | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t InitTaskMonitorContext(uint32_t aicpu_core_cnt); | |||
| /** | |||
| * set aicpu thread index for task monitor, called in thread callback function. | |||
| * @param [in]thread_index aicpu thread index | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t SetAicpuThreadIndex(uint32_t thread_index); | |||
| /** | |||
| * get aicpu thread index. | |||
| * @return uint32 | |||
| */ | |||
| uint32_t GetAicpuThreadIndex(); | |||
| /** | |||
| * set op name for task monitor. | |||
| * called in libtf_kernels.so(tf op) or libaicpu_processer.so(others) or cpu kernel framework. | |||
| * @param [in]opname op name | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t __attribute__((weak)) SetOpname(const std::string &opname); | |||
| /** | |||
| * get op name for task monitor | |||
| * @param [in]thread_index thread index | |||
| * @param [out]opname op name | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t GetOpname(uint32_t thread_index, std::string *opname); | |||
| /** | |||
| * get task and stream id. | |||
| * @param [in]task_id task id. | |||
| * @param [in]stream_id stream id. | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t __attribute__((weak)) GetTaskAndStreamId(uint64_t *task_id, uint32_t *stream_id); | |||
| /** | |||
| * set task and stream id. | |||
| * @param [in]task_id task id. | |||
| * @param [in]stream_id stream id. | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t __attribute__((weak)) SetTaskAndStreamId(uint64_t task_id, uint32_t stream_id); | |||
| /** | |||
| * set thread local context of key | |||
| * @param [in]key context key | |||
| * @param [in]value context value | |||
| * @return status whether this operation success | |||
| * @note Deprecated from 20201216, Replaced by SetThreadCtxInfo | |||
| */ | |||
| status_t __attribute__((weak)) SetThreadLocalCtx(const std::string &key, const std::string &value); | |||
| /** | |||
| * get thread local context of key | |||
| * @param [in]key context key | |||
| * @param [out]value context value | |||
| * @return status whether this operation success | |||
| * @note Deprecated from 20201216, Replaced by GetThreadCtxInfo | |||
| */ | |||
| status_t GetThreadLocalCtx(const std::string &key, std::string *value); | |||
| /** | |||
| * remove local context of key | |||
| * @param [in]key context key | |||
| * @return status whether this operation success | |||
| * @note Deprecated from 20201216, Replaced by RemoveThreadCtxInfo | |||
| */ | |||
| status_t RemoveThreadLocalCtx(const std::string &key); | |||
| /** | |||
| * get all thread context info of type | |||
| * @param [in]type: ctx type | |||
| * @param [in]thread_index: thread index | |||
| * @return const std::map<std::string, std::string> &: all thread context info | |||
| */ | |||
| const std::map<std::string, std::string> &GetAllThreadCtxInfo(aicpu::CtxType type, uint32_t thread_index); | |||
| /** | |||
| * set run mode. | |||
| * @param [in]run_mode: run mode. | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t __attribute__((weak)) SetAicpuRunMode(uint32_t run_mode); | |||
| /** | |||
| * get run mode. | |||
| * @param [out]run_mode: run mode. | |||
| * @return status whether this operation success | |||
| */ | |||
| status_t __attribute__((weak)) GetAicpuRunMode(uint32_t *run_mode); | |||
| /** | |||
| * Register callback function by event_id and subevent_id | |||
| * @param event_id event id | |||
| * @param subevent_id subevent id | |||
| * @param func call back function | |||
| */ | |||
| status_t __attribute__((weak)) | |||
| RegisterEventCallback(uint32_t event_id, uint32_t subevent_id, std::function<void(void *)> func); | |||
| /** | |||
| * Do callback function by event_id and subevent_id | |||
| * @param event_id event id | |||
| * @param subevent_id subevent id | |||
| * @param param event param | |||
| */ | |||
| status_t __attribute__((weak)) DoEventCallback(uint32_t event_id, uint32_t subevent_id, void *param); | |||
| /** | |||
| * Unregister callback function by event_id and subevent_id | |||
| * @param event_id event id | |||
| * @param subevent_id subevent id | |||
| */ | |||
| status_t __attribute__((weak)) UnRegisterCallback(uint32_t event_id, uint32_t subevent_id); | |||
| } // namespace aicpu | |||
| extern "C" { | |||
| /** | |||
| * set thread context info of type | |||
| * @param [in]type: ctx type | |||
| * @param [in]key: key of context info | |||
| * @param [in]value: value of context info | |||
| * @return status whether this operation success | |||
| */ | |||
| AICPU_VISIBILITY_API aicpu::status_t SetThreadCtxInfo(aicpu::CtxType type, const std::string &key, | |||
| const std::string &value); | |||
| /** | |||
| * get thread context info of type | |||
| * @param [in]type: ctx type | |||
| * @param [in]key: key of context info | |||
| * @param [out]value: value of context info | |||
| * @return status whether this operation success | |||
| */ | |||
| AICPU_VISIBILITY_API aicpu::status_t GetThreadCtxInfo(aicpu::CtxType type, const std::string &key, std::string *value); | |||
| /** | |||
| * remove thread context info of type | |||
| * @param [in]type: ctx type | |||
| * @param [in]key: key of context info | |||
| * @return status whether this operation success | |||
| */ | |||
| AICPU_VISIBILITY_API aicpu::status_t RemoveThreadCtxInfo(aicpu::CtxType type, const std::string &key); | |||
| } | |||
| #endif // AICPU_OPS_AICPU_CONTEXT_H_ | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "aicpu_sharder/aicpu_pulse.h" | |||
| #include <unordered_map> | |||
| #include <mutex> | |||
| #include <string> | |||
| #include "common/kernel_log.h" | |||
| namespace { | |||
| static std::unordered_map<std::string, PulseNotifyFunc> pulse_notify_func_map; | |||
| static std::mutex mtx; | |||
| } // namespace | |||
| __attribute__((visibility("default"))) void AicpuPulseNotify() { | |||
| std::unique_lock<std::mutex> lck(mtx); | |||
| AICPU_LOGD("Aicpu pulse notify start, notify func num=%zu.", pulse_notify_func_map.size()); | |||
| for (auto ¬ify_func : pulse_notify_func_map) { | |||
| AICPU_LOGD("Aicpu pulse notify %s start.", notify_func.first.c_str()); | |||
| notify_func.second(); | |||
| AICPU_LOGD("Aicpu pulse notify %s end.", notify_func.first.c_str()); | |||
| } | |||
| AICPU_LOGD("Aicpu pulse notify end."); | |||
| } | |||
| __attribute__((visibility("default"))) int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func) { | |||
| if (name == nullptr) { | |||
| AICPU_LOGE("Register pulse notify func failed as param name is null"); | |||
| return -1; | |||
| } | |||
| if (func == nullptr) { | |||
| AICPU_LOGE("Register pulse notify func for %s failed as param func is null", name); | |||
| return -1; | |||
| } | |||
| std::unique_lock<std::mutex> lck(mtx); | |||
| auto ret = pulse_notify_func_map.emplace(name, func); | |||
| if (!ret.second) { | |||
| AICPU_LOGE("Register pulse notify func for %s failed.", name); | |||
| return -1; | |||
| } | |||
| AICPU_LOGI("Register pulse notify func for %s success.", name); | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_PULSE_H_ | |||
| #define AICPU_OPS_AICPU_PULSE_H_ | |||
| #include <cstdint> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| typedef void (*PulseNotifyFunc)(void); | |||
| /** | |||
| * aicpu pulse notify. | |||
| * timer will call this method per second. | |||
| */ | |||
| void AicpuPulseNotify(void); | |||
| /** | |||
| * Register kernel pulse notify func. | |||
| * @param name name of kernel lib, must end with '\0' and unique. | |||
| * @param func pulse notify function. | |||
| * @return 0:success, other:failed. | |||
| */ | |||
| int32_t RegisterPulseNotifyFunc(const char *name, PulseNotifyFunc func); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // AICPU_OPS_AICPU_PULSE_H_ | |||
| @@ -0,0 +1,218 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "aicpu_sharder/aicpu_sharder.h" | |||
| #include <semaphore.h> | |||
| #include <unistd.h> | |||
| #include <error.h> | |||
| #include <atomic> | |||
| #include <algorithm> | |||
| #include <cerrno> | |||
| #include <cstring> | |||
| #include "common/kernel_log.h" | |||
| namespace aicpu { | |||
| #define AICPU_SHARDER_IF_TRUE_RUN(expr, run) \ | |||
| do { \ | |||
| if (expr) { \ | |||
| run; \ | |||
| } \ | |||
| } while (0) | |||
| void SharderNonBlock::Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num) { | |||
| schedule_ = schedule; | |||
| do_task_ = do_task; | |||
| cpu_core_num_ = cpu_core_num; | |||
| } | |||
| bool SharderNonBlock::Enqueue(const Closure &closure, bool submit_topic) { | |||
| if (schedule_ != nullptr) { | |||
| return schedule_(closure, submit_topic); | |||
| } | |||
| return false; | |||
| } | |||
| void SharderNonBlock::Schedule(const Closure &closure) { | |||
| if (!Enqueue(closure)) { | |||
| closure(); | |||
| } | |||
| } | |||
| uint32_t SharderNonBlock::GetCPUNum() { return cpu_core_num_; } | |||
| SharderNonBlock &SharderNonBlock::GetInstance() { | |||
| static SharderNonBlock sharder_non_block; | |||
| return sharder_non_block; | |||
| } | |||
| int64_t SharderNonBlock::CeilMultiple(int64_t x, int64_t base) { | |||
| if (base == 0) { | |||
| return 0; | |||
| } | |||
| int64_t ret = x / base; | |||
| if ((x % base) != 0) { | |||
| ret++; | |||
| } | |||
| return ret; | |||
| } | |||
| void SharderNonBlock::ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work) { | |||
| AICPU_LOGI("total: %lld, per_unit_size: %lld", total, per_unit_size); | |||
| if ((total <= 0) || (work == nullptr)) { | |||
| AICPU_LOGE("invalid param: total<=0 or work is nullptr"); | |||
| return; | |||
| } | |||
| // work itself | |||
| if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) { | |||
| AICPU_LOGI("work itself all"); | |||
| work(0, total); | |||
| return; | |||
| } | |||
| // In order to ensure a smaller scheduling delay, the maximum number of slices is twice the number of CPU cores | |||
| const int64_t max_shard_num = static_cast<int64_t>(cpu_core_num_) * 2; | |||
| // calculate shard number and block size | |||
| // i.e., if total is 118, perUintSize is 2, and cpu_core_num_ is 13 | |||
| // then shard_num is 24, block_size is 5 | |||
| int64_t block_size = std::max(int64_t{1}, std::min(total, per_unit_size)); | |||
| int64_t shard_num = CeilMultiple(total, block_size); | |||
| shard_num = std::min(max_shard_num, shard_num); | |||
| block_size = CeilMultiple(total, shard_num); | |||
| shard_num = CeilMultiple(total, block_size); | |||
| AICPU_LOGI("shard number: %lld, block size: %lld", shard_num, block_size); | |||
| // There is no need to submit an event if shard_num is 1 | |||
| if (shard_num == 1) { | |||
| AICPU_LOGI("executes on the current thread"); | |||
| work(0, total); | |||
| return; | |||
| } | |||
| std::atomic<int64_t> count(shard_num); // a counter | |||
| sem_t sem; | |||
| int32_t sem_init_ret = sem_init(&sem, 0, 0); | |||
| if (sem_init_ret == -1) { | |||
| AICPU_LOGE("sem_init error with message: %s", strerror(errno)); | |||
| work(0, total); | |||
| return; | |||
| } | |||
| for (int64_t start = 0; start < total; start += block_size) { | |||
| auto limit = std::min(start + block_size, total); | |||
| Closure closure = [&sem, &work, &count, start, limit]() { | |||
| count--; | |||
| // In order to ensure that user's work function exception does not affect multithread services, | |||
| // exception capture is needed. Exception type is not cared here, and error log is printed. | |||
| try { | |||
| work(start, limit); | |||
| } catch (...) { | |||
| AICPU_LOGE("exception occurred in work function with start: %lld, limit: %lld", start, limit); | |||
| } | |||
| int32_t sem_post_ret = sem_post(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno))); | |||
| }; | |||
| // if enqueue fail, work itself | |||
| if (!Enqueue(closure, true)) { | |||
| AICPU_LOGI("Enqueue fail, [%lld, %lld), work itself", start, limit); | |||
| closure(); | |||
| } | |||
| } | |||
| if (do_task_ != nullptr) { | |||
| bool ret = true; | |||
| while ((count > 0) && ret) { | |||
| AICPU_LOGI("Main thread do task begin."); | |||
| ret = do_task_(); | |||
| AICPU_LOGI("Main thread do task end."); | |||
| } | |||
| } | |||
| for (int64_t i = 0; i < shard_num; ++i) { | |||
| int sem_wait_ret = sem_wait(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno))); | |||
| } | |||
| int32_t sem_des_ret = sem_destroy(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno))); | |||
| } | |||
| void SharderNonBlock::ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work) { | |||
| AICPU_LOGI("total: %lld, cpu_nums: %d", total, cpu_nums); | |||
| if (total <= 0 || work == nullptr) { | |||
| AICPU_LOGE("invalid param: total<=0 or work is nullptr"); | |||
| return; | |||
| } | |||
| if ((schedule_ == nullptr) || (cpu_core_num_ <= 1)) { | |||
| AICPU_LOGE("schedule is nullptr or cpu core num is not enough"); | |||
| return; | |||
| } | |||
| std::atomic<int64_t> count(cpu_nums); // a counter | |||
| sem_t sem; | |||
| int32_t sem_init_ret = sem_init(&sem, 0, 0); | |||
| if (sem_init_ret == -1) { | |||
| AICPU_LOGE("sem_init error with message: %s", strerror(errno)); | |||
| return; | |||
| } | |||
| for (int64_t cur = 0; cur < cpu_nums; cur++) { | |||
| Closure closure = [&sem, &work, &count, total, cur]() { | |||
| work(total, cur); | |||
| count--; | |||
| int32_t sem_post_ret = sem_post(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_post_ret == -1, AICPU_LOGE("sem_post error with message: %s", strerror(errno))); | |||
| }; | |||
| // if enqueue fail, work itself | |||
| if (!Enqueue(closure, true)) { | |||
| closure(); | |||
| } | |||
| } | |||
| if (do_task_ != nullptr) { | |||
| bool ret = true; | |||
| while ((count > 0) && ret) { | |||
| ret = do_task_(); | |||
| } | |||
| } | |||
| for (int64_t i = 0; i < cpu_nums; i++) { | |||
| int sem_wait_ret = sem_wait(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_wait_ret == -1, AICPU_LOGE("sem_wait error with message: %s", strerror(errno))); | |||
| } | |||
| int32_t sem_des_ret = sem_destroy(&sem); | |||
| AICPU_SHARDER_IF_TRUE_RUN(sem_des_ret == -1, AICPU_LOGE("sem_destroy error with message: %s", strerror(errno))); | |||
| } | |||
| } // namespace aicpu | |||
| /** | |||
| * Shards the "total" unit of work refer "perUintSize" | |||
| */ | |||
| void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work) { | |||
| aicpu::SharderNonBlock::GetInstance().ParallelFor(total, per_unit_size, work); | |||
| } | |||
| /** | |||
| * Get CPU number | |||
| */ | |||
| uint32_t GetCPUNum() { return aicpu::SharderNonBlock::GetInstance().GetCPUNum(); } | |||
| @@ -0,0 +1,132 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_SHARDER_H_ | |||
| #define AICPU_OPS_AICPU_SHARDER_H_ | |||
| #include <functional> | |||
| #include <vector> | |||
| #include "common/kernel_util.h" | |||
| namespace aicpu { | |||
| using Closure = std::function<void()>; | |||
| using ClosureBool = std::function<bool()>; | |||
| using RunnerBool = std::function<bool(Closure, bool)>; | |||
| using SharderWork = std::function<void(int64_t, int64_t)>; | |||
| class SharderNonBlock { | |||
| public: | |||
| /** | |||
| * Get the unique object of this class | |||
| */ | |||
| static SharderNonBlock &GetInstance(); | |||
| /** | |||
| * Register schedule callback function, do_task function and cpu core number | |||
| * called by compute process | |||
| * @param schedule Schedule callback function | |||
| * @param do_task Callback function for itself schedule | |||
| * @param cpu_core_num aicpu core number | |||
| */ | |||
| void Register(const RunnerBool &schedule, const ClosureBool &do_task, uint32_t cpu_core_num); | |||
| /** | |||
| * Shards the "total" unit of work refer "perUintSize" | |||
| * @param total Total unit of work | |||
| * @param per_unit_size Minimum shard unit | |||
| * @param work should be a callable taking (int64, int64) arguments. | |||
| work(start, limit) computes the work units from [start, limit), | |||
| i.e., [start, limit) is a shard. | |||
| */ | |||
| void ParallelFor(int64_t total, int64_t per_unit_size, const SharderWork &work); | |||
| /** | |||
| * Shards the unit of work refer for hash | |||
| * @param total, Total unit of work | |||
| * @param cpu_nums Number of cpu cores | |||
| * @param work should be a callable taking (int64, int64) arguments. | |||
| work(cur, cpu_nums) computes the work units with input hash with (cpu_nums-1) equals cur, | |||
| i.e. specially used by parallel unique op | |||
| */ | |||
| void ParallelForHash(int64_t total, int64_t cpu_nums, const SharderWork &work); | |||
| /** | |||
| * Schedule a task use schedule function registered by compute process, | |||
| * note that the task will actually executed asynchronously | |||
| * @param closure Closure function with nothrow | |||
| */ | |||
| void Schedule(const Closure &closure); | |||
| /** | |||
| * Get CPU number | |||
| * @param None | |||
| * @return CPU number | |||
| */ | |||
| uint32_t GetCPUNum(); | |||
| private: | |||
| SharderNonBlock() : schedule_(nullptr), do_task_(nullptr), cpu_core_num_(0) {} | |||
| ~SharderNonBlock() = default; | |||
| SharderNonBlock(const SharderNonBlock &) = delete; | |||
| SharderNonBlock &operator=(const SharderNonBlock &) = delete; | |||
| SharderNonBlock(SharderNonBlock &&) = delete; | |||
| SharderNonBlock &operator=(SharderNonBlock &&) = delete; | |||
| /** | |||
| * Closure function enqueue | |||
| * @param closure Closure function can be called | |||
| * @param submit_topic whether submit topic, true means submit topic | |||
| * @return whether enqueue of closure success | |||
| */ | |||
| bool Enqueue(const Closure &closure, bool submit_topic = false); | |||
| /** | |||
| * Calculate how many times, which ceiled, "x" is "base". | |||
| * i.e., x is 1, base is 2, this function will return 1 | |||
| * @param x An integral | |||
| * @param base An integral as base when cal multiple | |||
| * @return ceiled multiple | |||
| */ | |||
| inline int64_t CeilMultiple(int64_t x, int64_t base); | |||
| private: | |||
| RunnerBool schedule_; // enqueue runner | |||
| ClosureBool do_task_; // a callback, do task from task queue | |||
| uint32_t cpu_core_num_; // aicpu core number | |||
| }; // SharderNonBlock | |||
| } // namespace aicpu | |||
| extern "C" { | |||
| /** | |||
| * Shards the "total" unit of work refer "perUintSize" | |||
| * @param total Total unit of work | |||
| * @param per_unit_size Minimum shard unit | |||
| * @param work should be a callable taking (int64, int64) arguments. | |||
| work(start, limit) computes the work units from [start, limit), | |||
| i.e., [start, limit) is a shard. | |||
| */ | |||
| AICPU_VISIBILITY_API void ParallelFor(int64_t total, int64_t per_unit_size, const aicpu::SharderWork &work); | |||
| /** | |||
| * Get CPU number | |||
| * @param None | |||
| * @return CPU number | |||
| */ | |||
| AICPU_VISIBILITY_API uint32_t GetCPUNum(); | |||
| } | |||
| #endif // AICPU_OPS_AICPU_SHARDER_H_ | |||
| @@ -0,0 +1,62 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_ | |||
| #define AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_ | |||
| #include <random> | |||
| #include <unordered_set> | |||
| namespace aicpu { | |||
| template <typename IntType = int> | |||
| class distinct_uniform_int_distribution { | |||
| public: | |||
| using result_type = IntType; | |||
| private: | |||
| using set_type = std::unordered_set<result_type>; | |||
| using distr_type = std::uniform_int_distribution<result_type>; | |||
| public: | |||
| distinct_uniform_int_distribution(result_type inf, result_type sup) | |||
| : inf_(inf), sup_(sup), range_(sup_ - inf_ + 1), distr_(inf_, sup_) {} | |||
| ~distinct_uniform_int_distribution() = default; | |||
| void reset() { | |||
| uset_.clear(); | |||
| distr_.reset(); | |||
| } | |||
| template <typename Generator> | |||
| result_type exec(Generator *engine) { | |||
| if (!(uset_.size() < range_)) { | |||
| std::terminate(); | |||
| } | |||
| result_type res; | |||
| do { | |||
| res = distr_(*engine); | |||
| } while (uset_.count(res) > 0); | |||
| uset_.insert(res); | |||
| return res; | |||
| } | |||
| private: | |||
| const result_type inf_; | |||
| const result_type sup_; | |||
| const size_t range_; | |||
| distr_type distr_; | |||
| set_type uset_; | |||
| }; | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_AICPU_DISTINCT_UNIFORM_INT_DISTRIBUTION_H_ | |||
| @@ -0,0 +1,255 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <map> | |||
| #include "common/kernel_base.h" | |||
| #include "common/kernel_errcode.h" | |||
| #include "common/tensor.h" | |||
| namespace aicpu { | |||
| namespace { | |||
| // max param len limit 10k. | |||
| constexpr uint32_t MAX_PARAM_LEN = 10240; | |||
| // max io address num limit 1024 | |||
| constexpr uint32_t MAX_IO_ADDR_NUMPARAM_LEN = 1024; | |||
| } // namespace | |||
| static const std::map<const ::aicpuops::DataType, size_t> kKernelBaseDataTypeSize = { | |||
| {aicpuops::MS_BOOL, sizeof(bool)}, {aicpuops::MS_INT8, sizeof(int8_t)}, | |||
| {aicpuops::MS_UINT8, sizeof(uint8_t)}, {aicpuops::MS_INT16, sizeof(int16_t)}, | |||
| {aicpuops::MS_UINT16, sizeof(uint16_t)}, {aicpuops::MS_INT32, sizeof(int32_t)}, | |||
| {aicpuops::MS_UINT32, sizeof(uint32_t)}, {aicpuops::MS_INT64, sizeof(int64_t)}, | |||
| {aicpuops::MS_UINT64, sizeof(uint64_t)}, {aicpuops::MS_FLOAT16, sizeof(float) / 2}, | |||
| {aicpuops::MS_FLOAT32, sizeof(float)}, {aicpuops::MS_FLOAT64, sizeof(double)}}; | |||
| KernelBase::KernelBase(const std::string &kernel_name) | |||
| : kernel_name_(kernel_name), | |||
| extend_param_len_(0), | |||
| extend_param_base_(nullptr), | |||
| param_head_(nullptr), | |||
| unknow_shape_(false) {} | |||
| uint32_t KernelBase::ParseParam(void *param) { | |||
| if (param == nullptr) { | |||
| AICPU_LOGE("Kernel:%s ParseParam param is null.", kernel_name_.c_str()); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| // parse param_len | |||
| param_head_ = static_cast<AicpuParamHead *>(param); | |||
| if (param_head_->length < sizeof(AicpuParamHead) || param_head_->length > MAX_PARAM_LEN) { | |||
| AICPU_LOGE("Kernel:%s param length=%u not in [%zu, %u].", kernel_name_.c_str(), param_head_->length, | |||
| sizeof(AicpuParamHead), MAX_PARAM_LEN); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| auto param_base = static_cast<uint8_t *>(param); | |||
| extend_param_base_ = param_base + sizeof(AicpuParamHead); | |||
| extend_param_len_ = param_head_->length - sizeof(AicpuParamHead); | |||
| if (param_head_->ioAddrNum > 0) { | |||
| if (param_head_->ioAddrNum > MAX_IO_ADDR_NUMPARAM_LEN) { | |||
| AICPU_LOGE("Kernel:%s param ioAddrNum=%u is over %u.", kernel_name_.c_str(), param_head_->ioAddrNum, | |||
| MAX_IO_ADDR_NUMPARAM_LEN); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| uint32_t addr_len = param_head_->ioAddrNum * sizeof(uint64_t); | |||
| if (extend_param_len_ < addr_len) { | |||
| AICPU_LOGE("Kernel:%s extend param is not enough for io addr, ioAddrNum=%u, extendParamLen=%u.", | |||
| kernel_name_.c_str(), param_head_->ioAddrNum, extend_param_len_); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| auto io_addr_base = reinterpret_cast<uint64_t *>(extend_param_base_); | |||
| for (uint32_t i = 0; i < param_head_->ioAddrNum; ++i) { | |||
| io_addrs_.push_back(static_cast<uintptr_t>(io_addr_base[i])); | |||
| } | |||
| extend_param_base_ = extend_param_base_ + addr_len; | |||
| extend_param_len_ -= addr_len; | |||
| } | |||
| AICPU_CHK_STATUS_RET(ParseNodeDef()) | |||
| AICPU_CHK_STATUS_RET(ParseExtInfo()) | |||
| if (unknow_shape_) { | |||
| AICPU_LOGI("Unknown shape op: %s", kernel_name_.c_str()); | |||
| AICPU_CHK_STATUS_RET(UpdateInputShape()) | |||
| AICPU_CHK_STATUS_RET(UpdateOutputShape()) | |||
| } | |||
| return ParseKernelParam(); | |||
| } | |||
| uint32_t KernelBase::Compute(void *param) { | |||
| uint32_t ret = ParseParam(param); | |||
| if (ret != AICPU_KERNEL_STATE_SUCCESS) { | |||
| AICPU_LOGE("Kernel:%s ParseParam failed, ret=%u.", kernel_name_.c_str(), ret); | |||
| return ret; | |||
| } | |||
| return DoCompute(); | |||
| } | |||
| size_t KernelBase::GetDataTypeSize(::aicpuops::DataType data_type) { | |||
| auto it = kKernelBaseDataTypeSize.find(data_type); | |||
| if (it == kKernelBaseDataTypeSize.end()) { | |||
| AICPU_LOGE("don't support input tensor types"); | |||
| return 0; | |||
| } | |||
| return it->second; | |||
| } | |||
| template <typename T> | |||
| uint32_t KernelBase::ParseExtendParam(T *param_var, std::string param_name) { | |||
| if (extend_param_len_ < sizeof(T)) { | |||
| AICPU_LOGE("Kernel:%s extend param is not enough for [%s] addr, need_len=%u, extendParamLen=%u.", | |||
| kernel_name_.c_str(), param_name.c_str(), sizeof(T), extend_param_len_); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| T *param = reinterpret_cast<T *>(extend_param_base_); | |||
| if (param != nullptr) { | |||
| *param_var = *param; | |||
| extend_param_base_ += sizeof(T); | |||
| extend_param_len_ -= sizeof(T); | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| AICPU_LOGE("Kernel:%s extend param for [%s] addr is invalid.", kernel_name_.c_str(), param_name.c_str()); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| uint32_t KernelBase::ParseNodeDef() { | |||
| uint32_t node_def_len; | |||
| AICPU_CHK_STATUS_RET(ParseExtendParam(&node_def_len, "node_def_len")) | |||
| if (extend_param_len_ < node_def_len) { | |||
| AICPU_LOGE("Kernel:%s extend param is not enough for customizeAttr addr, node_def_len=%u, extendParamLen=%u.", | |||
| kernel_name_.c_str(), node_def_len, extend_param_len_); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| std::string std_data(reinterpret_cast<char *>(extend_param_base_), node_def_len); | |||
| if (!node_def_.ParseFromString(std_data)) { | |||
| AICPU_LOGE("parse %s KernelBase proto failed, nodeDef=%s.", kernel_name_.c_str(), std_data.c_str()); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| extend_param_base_ += node_def_len; | |||
| extend_param_len_ -= node_def_len; | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::ParseExtShapeType(FWKAdapter::ExtInfo *ext_info) { | |||
| if (ext_info->infoLen != sizeof(int32_t)) { | |||
| AICPU_LOGE("Kernel:%s parse ext shape type failed as infoLen must be %zu but %u.", kernel_name_.c_str(), | |||
| sizeof(int32_t), ext_info->infoLen); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| unknow_shape_ = true; | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::ParseExtInputShape(FWKAdapter::ExtInfo *ext_info) { | |||
| // no overflow | |||
| auto need_len = node_def_.inputs_size() * sizeof(FWKAdapter::ShapeAndType); | |||
| if (ext_info->infoLen != need_len) { | |||
| AICPU_LOGE( | |||
| "Kernel:%s parse ext input shape failed as infoLen must be " | |||
| "input_num[%d]*sizeof(ShapeAndType)[%zu], but %u.", | |||
| kernel_name_.c_str(), node_def_.inputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| input_shape_and_type_.clear(); | |||
| auto input = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg); | |||
| for (int index = 0; index < node_def_.inputs_size(); ++index) { | |||
| input_shape_and_type_.emplace_back(&input[index]); | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info) { | |||
| // no overflow | |||
| auto need_len = node_def_.outputs_size() * sizeof(FWKAdapter::ShapeAndType); | |||
| if (ext_info->infoLen != need_len) { | |||
| AICPU_LOGE( | |||
| "Kernel:%s parse ext output shape failed as infoLen must be " | |||
| "output_num[%d]*sizeof(ShapeAndType)[%zu], but %u.", | |||
| kernel_name_.c_str(), node_def_.outputs_size(), sizeof(FWKAdapter::ShapeAndType), ext_info->infoLen); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| output_shape_and_type_.clear(); | |||
| auto output = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg); | |||
| for (int index = 0; index < node_def_.outputs_size(); ++index) { | |||
| output_shape_and_type_.emplace_back(&output[index]); | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::ParseExtInfo() { | |||
| uint32_t offset = 0; | |||
| FWKAdapter::ExtInfo *ext_info_ptr = nullptr; | |||
| char *ext_info_buf = reinterpret_cast<char *>(static_cast<uintptr_t>(param_head_->extInfoAddr)); | |||
| while (offset + sizeof(FWKAdapter::ExtInfo) <= param_head_->extInfoLength) { | |||
| ext_info_ptr = reinterpret_cast<FWKAdapter::ExtInfo *>(ext_info_buf + offset); | |||
| if (ext_info_ptr == nullptr) { | |||
| AICPU_LOGE("Kernel:%s ext_info is nullptr, extInfoLength=%u, extInfoAddr=%p, offset=%zu.", kernel_name_.c_str(), | |||
| param_head_->extInfoLength, param_head_->extInfoAddr, offset); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| switch (ext_info_ptr->infoType) { | |||
| case FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE: | |||
| AICPU_CHK_STATUS_RET(ParseExtShapeType(ext_info_ptr)) | |||
| break; | |||
| case FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE: | |||
| AICPU_CHK_STATUS_RET(ParseExtInputShape(ext_info_ptr)) | |||
| break; | |||
| case FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE: | |||
| AICPU_CHK_STATUS_RET(ParseExtOutputShape(ext_info_ptr)) | |||
| break; | |||
| default: | |||
| AICPU_LOGI("Kernel:%s ignore infoType=%d, infoLen=%u.", kernel_name_.c_str(), ext_info_ptr->infoType, | |||
| ext_info_ptr->infoLen); | |||
| break; | |||
| } | |||
| // not overflow | |||
| offset += FWKAdapter::kExtInfoHeadSize; | |||
| offset += ext_info_ptr->infoLen; | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::UpdateInputShape() { | |||
| for (int i = 0; i < node_def_.inputs_size(); ++i) { | |||
| aicpuops::Tensor *input_tensor = node_def_.mutable_inputs(i); | |||
| aicpuops::TensorShape *input_tensor_shape = input_tensor->mutable_tensor_shape(); | |||
| input_tensor_shape->clear_dim(); | |||
| for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) { | |||
| // LLONG_MIN for dim end flag | |||
| if (input_shape_and_type_[i]->dims[index] == LLONG_MIN) { | |||
| break; | |||
| } | |||
| input_tensor_shape->add_dim()->set_size(input_shape_and_type_[i]->dims[index]); | |||
| } | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| uint32_t KernelBase::UpdateOutputShape() { | |||
| for (int i = 0; i < node_def_.outputs_size(); ++i) { | |||
| aicpuops::Tensor *output_tensor = node_def_.mutable_outputs(i); | |||
| aicpuops::TensorShape *output_tensor_shape = output_tensor->mutable_tensor_shape(); | |||
| output_tensor_shape->clear_dim(); | |||
| for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) { | |||
| // LLONG_MIN for dim end flag | |||
| if (output_shape_and_type_[i]->dims[index] == LLONG_MIN) { | |||
| break; | |||
| } | |||
| output_tensor_shape->add_dim()->set_size(output_shape_and_type_[i]->dims[index]); | |||
| } | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| } // namespace aicpu | |||
| @@ -0,0 +1,82 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_ | |||
| #define AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_ | |||
| #include <cstdint> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "common/kernel_util.h" | |||
| #include "aicpu/common/aicpu_task_struct.h" | |||
| #include "securec/include/securec.h" | |||
| #include "common/tensor.h" | |||
| #include "cce/fwk_adpt_struct.h" | |||
| #include "common/kernel_log.h" | |||
| #include "proto/aicpu_tensor.pb.h" | |||
| namespace aicpu { | |||
| class KernelBase { | |||
| public: | |||
| explicit KernelBase(const std::string &kernel_name); | |||
| ~KernelBase() = default; | |||
| uint32_t Compute(void *param); | |||
| size_t GetDataTypeSize(::aicpuops::DataType data_type); | |||
| protected: | |||
| virtual uint32_t ParseKernelParam() = 0; | |||
| virtual uint32_t DoCompute() = 0; | |||
| template <typename T> | |||
| uint32_t ParseExtendParam(T *param_var, std::string param_name); | |||
| uint32_t ParseNodeDef(); | |||
| uint32_t ParseExtInfo(); | |||
| uint32_t ParseExtShapeType(FWKAdapter::ExtInfo *ext_info); | |||
| uint32_t ParseExtInputShape(FWKAdapter::ExtInfo *ext_info); | |||
| uint32_t ParseExtOutputShape(FWKAdapter::ExtInfo *ext_info); | |||
| uint32_t UpdateInputShape(); | |||
| uint32_t UpdateOutputShape(); | |||
| private: | |||
| KernelBase(const KernelBase &) = delete; | |||
| KernelBase &operator=(const KernelBase &) = delete; | |||
| KernelBase(KernelBase &&) = delete; | |||
| KernelBase &operator=(KernelBase &&) = delete; | |||
| uint32_t ParseParam(void *param); | |||
| protected: | |||
| std::string kernel_name_; | |||
| std::vector<uintptr_t> io_addrs_; | |||
| uint32_t extend_param_len_; | |||
| uint8_t *extend_param_base_; | |||
| AicpuParamHead *param_head_; | |||
| bool unknow_shape_; | |||
| aicpuops::NodeDef node_def_; | |||
| std::vector<FWKAdapter::ShapeAndType *> input_shape_and_type_; | |||
| std::vector<FWKAdapter::ShapeAndType *> output_shape_and_type_; | |||
| }; | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_AICPU_COMMON_KERNEL_BASE_H_ | |||
| @@ -0,0 +1,30 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_ | |||
| #define AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_ | |||
| namespace aicpu { | |||
| enum AicpuKernelErrCode { | |||
| // 0-3 is fixed error code, runtime need interpret 0-3 error codes | |||
| AICPU_KERNEL_STATE_SUCCESS = 0, | |||
| AICPU_KERNEL_STATE_PARAM_INVALID = 1, | |||
| AICPU_KERNEL_STATE_FAILED = 2, | |||
| AICPU_KERNEL_STATE_EXECUTE_TIMEOUT = 3, | |||
| AICPU_KERNEL_STATE_INTERNAL_ERROR = 4, | |||
| AICPU_KERNEL_STATE_END_OF_SEQUENCE = 201, | |||
| }; | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_AICPU_COMMON_KENERL_ERRCODE_H_ | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/kernel_log.h" | |||
| namespace aicpu { | |||
| static int log_level = AICPU_LOG_ERROR; | |||
| int LogSetLevel(int level) { | |||
| log_level = level; | |||
| return log_level; | |||
| } | |||
| int LogGetLevel(void) { return log_level; } | |||
| bool CheckLogLevel(int log_level_check) { return log_level >= log_level_check; } | |||
| } // namespace aicpu | |||
| @@ -0,0 +1,77 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_ | |||
| #define AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_ | |||
| #include <unistd.h> | |||
| #include <sys/syscall.h> | |||
| #include <iostream> | |||
| #include <utility> | |||
| #include "common/kernel_errcode.h" | |||
| inline int GetTid(void) { | |||
| thread_local static int tid = syscall(__NR_gettid); | |||
| return tid; | |||
| } | |||
| static const int LOG_COUNT = 0; | |||
| namespace aicpu { | |||
| #define AICPU_LOG_DEBUG 0 | |||
| #define AICPU_LOG_INFO 1 | |||
| #define AICPU_LOG_WARN 2 | |||
| #define AICPU_LOG_ERROR 3 | |||
| #define AICPU_LOG_EVENT 0x10 | |||
| inline void PrintLog(const int level) { std::cerr << level << std::endl; } | |||
| template <typename T, typename... Args> | |||
| inline void PrintLog(const int level, T &&head, Args &&... tail) { | |||
| std::cerr << std::forward<T>(head) << " "; | |||
| PrintLog(level, std::forward<Args>(tail)...); | |||
| } | |||
| int LogSetLevel(int level); | |||
| int LogGetLevel(void); | |||
| bool CheckLogLevel(int log_level_check); | |||
| #define AICPU_LOGD(fmt, ...) \ | |||
| AICPU_LOG(AICPU_LOG_DEBUG, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__); | |||
| #define AICPU_LOGI(fmt, ...) \ | |||
| AICPU_LOG(AICPU_LOG_INFO, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__); | |||
| #define AICPU_LOGW(fmt, ...) \ | |||
| AICPU_LOG(AICPU_LOG_WARN, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__); | |||
| #define AICPU_LOGE(fmt, ...) \ | |||
| AICPU_LOG(AICPU_LOG_ERROR, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__); | |||
| #define AICPU_LOGEVENT(fmt, ...) \ | |||
| AICPU_LOG(AICPU_LOG_EVENT, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__); | |||
| #define AICPU_LOG(level, fmt, ...) \ | |||
| do { \ | |||
| if (aicpu::CheckLogLevel(level)) { \ | |||
| aicpu::PrintLog(level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
| } \ | |||
| } while (LOG_COUNT != 0) | |||
| #define AICPU_CHK_STATUS_RET(expr...) \ | |||
| do { \ | |||
| const uint32_t status = (expr); \ | |||
| if (status != AICPU_KERNEL_STATE_SUCCESS) { \ | |||
| return status; \ | |||
| } \ | |||
| } while (0); | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_ | |||
| @@ -0,0 +1,22 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_ | |||
| #define AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_ | |||
| #ifndef AICPU_VISIBILITY_API | |||
| #define AICPU_VISIBILITY_API __attribute__((visibility("default"))) | |||
| #endif | |||
| #endif // AICPU_OPS_AICPU_COMMON_KERNEL_UTIL_H_ | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_COMMON_TENSOR_H_ | |||
| #define AICPU_OPS_COMMON_TENSOR_H_ | |||
| #include <atomic> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <map> | |||
| namespace aicpu { | |||
| namespace ms { | |||
| class Tensor { | |||
| public: | |||
| Tensor() = default; | |||
| ~Tensor() = default; | |||
| const uint8_t *GetData() const; | |||
| size_t GetSize() const; | |||
| void SetData(uint8_t *data, size_t size); | |||
| private: | |||
| uint8_t *tensor_ptr_; | |||
| size_t tensor_len_; | |||
| }; | |||
| } // namespace ms | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_COMMON_TENSOR_H_ | |||
| @@ -0,0 +1,280 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "./random_choice_with_mask_kernels.h" | |||
| #include <random> | |||
| #include <climits> | |||
| #include <vector> | |||
| #include <algorithm> | |||
| #include <string> | |||
| #include "aicpu_sharder/aicpu_sharder.h" | |||
| #include "proto/aicpu_tensor.pb.h" | |||
| #include "common/distinct_uniform_int_distribution.h" | |||
| #include "common/tensor.h" | |||
| namespace aicpu { | |||
| static void ParseOutputCoordinate(std::vector<int64_t> dims_, int32_t output_length, int32_t input_dim_size, | |||
| int32_t input_total_count, const int *tmp_output, int *output) { | |||
| int it = 0; | |||
| int column = input_total_count / dims_[0]; | |||
| for (int i = 0; i < output_length; i++) { | |||
| int32_t tmp_output_number = tmp_output[i]; | |||
| int tmp_column = column; | |||
| for (int j = 0; j < input_dim_size; j++) { | |||
| if (j == input_dim_size - 1) { | |||
| output[it++] = tmp_output_number; | |||
| continue; | |||
| } | |||
| output[it++] = tmp_output_number / column; | |||
| tmp_output_number = tmp_output_number % column; | |||
| tmp_column = tmp_column / dims_[j + 1]; | |||
| } | |||
| } | |||
| } | |||
| static void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count, | |||
| int32_t non_zero_num) { | |||
| if (count == 0) { | |||
| *padding_flag = false; | |||
| *output_length = non_zero_num; | |||
| *output_non_zero_length = non_zero_num; | |||
| } else if (count > 0 && count <= non_zero_num) { | |||
| *padding_flag = false; | |||
| *output_length = count; | |||
| *output_non_zero_length = count; | |||
| } else if (count > non_zero_num) { | |||
| *padding_flag = true; | |||
| *output_length = count; | |||
| *output_non_zero_length = non_zero_num; | |||
| } else { | |||
| AICPU_LOGI("input count must greater or equal to 0 but instead is %d", count); | |||
| } | |||
| } | |||
| static bool GetInputTotalCount(const std::vector<int64_t> &dims_, int32_t *input_total_count, | |||
| const int32_t &input_dim_size) { | |||
| const int32_t max_inpu_dim = 5; | |||
| if (input_dim_size < 1 || input_dim_size > max_inpu_dim) { | |||
| AICPU_LOGE( | |||
| "input dim size is %d, it must greater or equal to 1 channels " | |||
| "and less than or equal to 5 channels!", | |||
| input_dim_size); | |||
| return false; | |||
| } | |||
| for (int32_t i = 0; i < input_dim_size; i++) { | |||
| *input_total_count *= dims_[i]; | |||
| } | |||
| if (*input_total_count <= 0) { | |||
| AICPU_LOGE("input_total_count is %d, please check setting.", *input_total_count); | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| static void UpdateOutput(const std::vector<int64_t> &dims_, const int32_t &non_zero_num, const int32_t &count_, | |||
| const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) { | |||
| for (int32_t i = non_zero_num * dims_.size(); i < static_cast<int32_t>(count_ * dims_.size()); i++) { | |||
| output_coordinate[i] = 0; | |||
| } | |||
| for (int32_t i = 0; i < output_length; i++) { | |||
| mask[i] = static_cast<bool>(mask_dim[i]); | |||
| } | |||
| for (int32_t i = non_zero_num; i < count_; i++) { | |||
| mask[i] = false; | |||
| } | |||
| } | |||
| static bool GenerateRandomMask(const int32_t &output_length, const int32_t &non_zero_num, | |||
| const int32_t &output_non_zero_length, int **input_dim, int **tmp_output, | |||
| int **mask_dim) { | |||
| *tmp_output = reinterpret_cast<int *>(malloc(output_length * sizeof(int))); | |||
| if (*tmp_output == nullptr) { | |||
| AICPU_LOGE("malloc memory failed!"); | |||
| free(*input_dim); | |||
| return false; | |||
| } | |||
| std::random_device rd; | |||
| std::mt19937 gen(rd()); | |||
| aicpu::distinct_uniform_int_distribution<> dis(0, non_zero_num - 1); | |||
| *mask_dim = reinterpret_cast<int *>(malloc(output_length * sizeof(int))); | |||
| if (*mask_dim == nullptr) { | |||
| AICPU_LOGE("malloc memory failed!"); | |||
| free(*input_dim); | |||
| free(*tmp_output); | |||
| return false; | |||
| } | |||
| if (memset_s(*mask_dim, output_length, 0x00, output_length) != EOK) { | |||
| AICPU_LOGE("memset_s to mask_dim failed!"); | |||
| free(*input_dim); | |||
| free(*tmp_output); | |||
| free(*mask_dim); | |||
| return false; | |||
| } | |||
| if (memset_s(*tmp_output, output_length, 0x00, output_length) != EOK) { | |||
| AICPU_LOGE("memset_s to tmp_output failed!"); | |||
| free(*input_dim); | |||
| free(*tmp_output); | |||
| free(*mask_dim); | |||
| return false; | |||
| } | |||
| if (output_non_zero_length > output_length) { | |||
| AICPU_LOGE("output_non_zero_length size is too long!"); | |||
| free(*input_dim); | |||
| free(*tmp_output); | |||
| free(*mask_dim); | |||
| return false; | |||
| } | |||
| for (int32_t i = 0; i < output_non_zero_length; i++) { | |||
| int32_t mean = dis.exec(&gen); | |||
| *((*tmp_output) + i) = *((*input_dim) + mean); | |||
| *((*mask_dim) + i) = 1; | |||
| } | |||
| return true; | |||
| } | |||
| uint32_t RandomChoiceWithMaskKernel::DoCompute() { | |||
| auto *input = reinterpret_cast<bool *>(io_addrs_[0]); | |||
| auto *output_coordinate = reinterpret_cast<int32_t *>(io_addrs_[1]); | |||
| auto *mask = reinterpret_cast<bool *>(io_addrs_[2]); | |||
| int32_t input_dim_size = dims_.size(); | |||
| int32_t non_zero_num = 0; | |||
| int32_t input_total_count = 1; | |||
| bool ret = GetInputTotalCount(dims_, &input_total_count, input_dim_size); | |||
| if (!ret) { | |||
| AICPU_LOGE("Get input total count failed!"); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| int *input_dim = reinterpret_cast<int *>(malloc(input_total_count * sizeof(int))); | |||
| if (input_dim == nullptr) { | |||
| AICPU_LOGE("Malloc memory failed!"); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| for (int32_t i = 0; i < input_total_count; i++) { | |||
| if (input[i] != 0) { | |||
| input_dim[non_zero_num] = i; | |||
| non_zero_num++; | |||
| } | |||
| } | |||
| bool padding_flag = false; | |||
| int32_t output_length = 0; | |||
| int32_t output_non_zero_length = 0; | |||
| GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num); | |||
| int *tmp_output = nullptr; | |||
| int *mask_dim = nullptr; | |||
| ret = GenerateRandomMask(output_length, non_zero_num, output_non_zero_length, &input_dim, &tmp_output, &mask_dim); | |||
| if (!ret) { | |||
| AICPU_LOGE("Generate random mask failed!"); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| if (padding_flag) { | |||
| int32_t index = 0; | |||
| for (int32_t i = output_length - 1; i > non_zero_num; i--) { | |||
| tmp_output[non_zero_num + index] = 0; | |||
| mask_dim[non_zero_num + index] = 0; | |||
| index++; | |||
| } | |||
| } | |||
| int32_t copy_output_length = 0; | |||
| if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) { | |||
| AICPU_LOGE("Output size exceed INT_MAX"); | |||
| free(input_dim); | |||
| free(tmp_output); | |||
| free(mask_dim); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| copy_output_length = output_length * input_dim_size; | |||
| int *output = reinterpret_cast<int *>(malloc(copy_output_length * sizeof(int))); | |||
| if (output == nullptr) { | |||
| AICPU_LOGE("malloc memory failed!"); | |||
| free(input_dim); | |||
| free(tmp_output); | |||
| free(mask_dim); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| if (memset_s(output, copy_output_length, 0x00, copy_output_length) != EOK) { | |||
| AICPU_LOGE("memset_s memory failed!"); | |||
| free(input_dim); | |||
| free(mask_dim); | |||
| free(tmp_output); | |||
| free(output); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output); | |||
| int32_t actual_output_length = count_ * dims_.size(); | |||
| copy_output_length = std::min(actual_output_length, copy_output_length); | |||
| int32_t copy_output_bytes = 0; | |||
| if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) { | |||
| AICPU_LOGE("The output length is out of range!"); | |||
| free(input_dim); | |||
| free(mask_dim); | |||
| free(tmp_output); | |||
| free(output); | |||
| return AICPU_KERNEL_STATE_INTERNAL_ERROR; | |||
| } | |||
| copy_output_bytes = copy_output_length * sizeof(int32_t); | |||
| memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes); | |||
| UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask); | |||
| AICPU_LOGI("no zero num is %d, output_length is %d ", non_zero_num, output_length); | |||
| UpdateOutputShapeValue(non_zero_num, output_length); | |||
| free(input_dim); | |||
| free(mask_dim); | |||
| free(tmp_output); | |||
| free(output); | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| void RandomChoiceWithMaskKernel::UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length) { | |||
| if (unknow_shape_) { | |||
| output_shape_and_type_[0]->dims[0] = non_zero_num; | |||
| output_shape_and_type_[1]->dims[0] = output_length; | |||
| } | |||
| } | |||
| uint32_t RandomChoiceWithMaskKernel::ParseKernelParam() { | |||
| ::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def_.attrs(); | |||
| aicpuops::AttrValue random_choice_count_attrs = nodedef_map["count"]; | |||
| count_ = random_choice_count_attrs.i(); | |||
| AICPU_LOGI("This op attr count is %d", count_); | |||
| if ((count_ == 0) && (!unknow_shape_)) { | |||
| AICPU_LOGE("This op attr count is 0, but the shapetype is %d", unknow_shape_); | |||
| return AICPU_KERNEL_STATE_PARAM_INVALID; | |||
| } | |||
| size_t inputs_size = node_def_.inputs_size(); | |||
| for (size_t i = 0; i < inputs_size; i++) { | |||
| aicpuops::Tensor input_tensor = node_def_.inputs(i); | |||
| aicpuops::TensorShape input_shape = input_tensor.tensor_shape(); | |||
| for (int j = 0; j < input_shape.dim_size(); j++) { | |||
| dims_.push_back(input_shape.dim(j).size()); | |||
| } | |||
| } | |||
| return AICPU_KERNEL_STATE_SUCCESS; | |||
| } | |||
| } // namespace aicpu | |||
| extern "C" { | |||
| __attribute__((visibility("default"))) uint32_t RandomChoiceWithMask(void *param) { | |||
| aicpu::RandomChoiceWithMaskKernel randomChoiceWithMaskKernel; | |||
| return randomChoiceWithMaskKernel.Compute(param); | |||
| } | |||
| } | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_ | |||
| #define AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_ | |||
| #include <vector> | |||
| #include "common/kernel_base.h" | |||
| namespace aicpu { | |||
| class RandomChoiceWithMaskKernel : public KernelBase { | |||
| public: | |||
| RandomChoiceWithMaskKernel() : KernelBase("RandomChoiceWithMask") {} | |||
| ~RandomChoiceWithMaskKernel() = default; | |||
| protected: | |||
| int32_t count_ = 0; | |||
| std::vector<int64_t> dims_; | |||
| uint32_t DoCompute() override; | |||
| uint32_t ParseKernelParam() override; | |||
| void UpdateOutputShapeValue(int32_t non_zero_num, int32_t output_length); | |||
| }; | |||
| } // namespace aicpu | |||
| #endif // AICPU_OPS_AICPU_RANDOM_CHOICE_WITH_MASK_KERNELS_H_ | |||