Browse Source

Pre Merge pull request !631 from 周超/development

pull/631/MERGE
周超 Gitee 5 years ago
parent
commit
7bf3f7eff0
14 changed files with 369 additions and 39 deletions
  1. +2
    -0
      ge/CMakeLists.txt
  2. +1
    -0
      ge/executor/CMakeLists.txt
  3. +1
    -0
      ge/executor/module.mk
  4. +1
    -0
      ge/ge_inference.mk
  5. +1
    -0
      ge/ge_runner.mk
  6. +46
    -0
      ge/graph/execute/dynamic_execute_addr.cc
  7. +39
    -0
      ge/graph/execute/dynamic_execute_addr.h
  8. +11
    -2
      ge/graph/load/new_model_manager/davinci_model.cc
  9. +204
    -30
      ge/graph/load/new_model_manager/model_manager.cc
  10. +13
    -3
      ge/graph/load/new_model_manager/model_manager.h
  11. +13
    -1
      ge/graph/manager/graph_manager.cc
  12. +28
    -3
      ge/hybrid/executor/hybrid_model_async_executor.cc
  13. +8
    -0
      inc/external/ge/ge_api_types.h
  14. +1
    -0
      inc/framework/common/ge_types.h

+ 2
- 0
ge/CMakeLists.txt View File

@@ -79,6 +79,7 @@ set(TRAIN_SRC_LIST
"graph/common/omg_util.cc"
"graph/common/transop_util.cc"
"graph/execute/graph_execute.cc"
"graph/execute/dynamic_execute_addr.cc"
"graph/label/case_label_maker.cc"
"graph/label/if_label_maker.cc"
"graph/label/label_maker.cc"
@@ -384,6 +385,7 @@ set(INFER_SRC_LIST
"graph/preprocess/multi_batch_options.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"graph/execute/graph_execute.cc"
"graph/execute/dynamic_execute_addr.cc"
"graph/load/graph_loader.cc"
"graph/optimize/graph_optimize.cc"
"graph/optimize/mem_rw_conflict_optimize.cc"


+ 1
- 0
ge/executor/CMakeLists.txt View File

@@ -20,6 +20,7 @@ set(SRC_LIST
"../common/profiling/ge_profiling.cc"
"../graph/load/graph_loader.cc"
"../graph/execute/graph_execute.cc"
"../graph/execute/dynamic_execute_addr.cc"
"../omm/csa_interact.cc"
"../graph/manager/graph_manager_utils.cc"
"../graph/manager/graph_var_manager.cc"


+ 1
- 0
ge/executor/module.mk View File

@@ -11,6 +11,7 @@ local_ge_executor_src_files := \
../common/profiling/ge_profiling.cc \
../graph/load/graph_loader.cc \
../graph/execute/graph_execute.cc \
../graph/execute/dynamic_execute_addr.cc \
../omm/csa_interact.cc \
../graph/manager/graph_manager_utils.cc \
../graph/manager/graph_var_manager.cc \


+ 1
- 0
ge/ge_inference.mk View File

@@ -50,6 +50,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \
graph/preprocess/multi_batch_options.cc \
graph/preprocess/multi_batch_copy_graph.cc \
graph/execute/graph_execute.cc \
graph/execute/dynamic_execute_addr.cc \
graph/load/graph_loader.cc \
graph/optimize/graph_optimize.cc \
graph/optimize/mem_rw_conflict_optimize.cc \


+ 1
- 0
ge/ge_runner.mk View File

@@ -48,6 +48,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/common/omg_util.cc \
graph/common/transop_util.cc \
graph/execute/graph_execute.cc \
graph/execute/dynamic_execute_addr.cc \
graph/label/case_label_maker.cc \
graph/label/if_label_maker.cc \
graph/label/label_maker.cc \


+ 46
- 0
ge/graph/execute/dynamic_execute_addr.cc View File

@@ -0,0 +1,46 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "dynamic_execute_addr.h"

#include "common/ge_inner_error_codes.h"
#include "common/debug/log.h"

namespace ge {
DynamicExecuteAddr &DynamicExecuteAddr::GetInstance() {
static DynamicExecuteAddr instance;
return instance;
}

std::map<void *, hybrid::TensorValue>DynamicExecuteAddr::GetTensorValue(uint64_t session_id) {
auto it = dynamic_execute_dev_addr_.find(session_id);
return it->second;
}

void DynamicExecuteAddr::RemoveTensorValue(uint64_t session_id,void * data_dev_addr) {
auto it = dynamic_execute_dev_addr_.find(session_id);
if (it != dynamic_execute_dev_addr_.end()) {
auto tensor_values = it ->second;
auto tensor_value_it = tensor_values.find(data_dev_addr);
if (tensor_value_it != tensor_values.end()) {
tensor_values.erase(tensor_value_it);
}
GELOGW("Can not find data device addr");
}
GELOGW("Can not find session,session id is %ld",session_id);
}

} // namespace ge

+ 39
- 0
ge/graph/execute/dynamic_execute_addr.h View File

@@ -0,0 +1,39 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_EXECUTE_DYNAMIC_EXECUTE_ADDR_H_
#define GE_GRAPH_EXECUTE_DYNAMIC_EXECUTE_ADDR_H_

#include "hybrid/common/tensor_value.h"

#include <map>

namespace ge {
class DynamicExecuteAddr {
public:
static DynamicExecuteAddr &GetInstance();
void SetDynamicExecuteDevAddr(uint64_t session_id, std::map<void *, hybrid::TensorValue> tensor_value) {
dynamic_execute_dev_addr_.emplace(session_id, tensor_value);
}
std::map<void *, hybrid::TensorValue> GetTensorValue(uint64_t session_id);
void RemoveTensorValue(uint64_t session_id, void *data_dev_addr);

private:
std::map<uint64_t, std::map<void *, hybrid::TensorValue>> dynamic_execute_dev_addr_;
};
} // namespace ge

#endif // GE_GRAPH_EXECUTE_DYNAMIC_EXECUTE_ADDR_H_

+ 11
- 2
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -59,6 +59,7 @@
#include "securec.h"
#include "graph/common/local_context.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "graph/execute/dynamic_execute_addr.h"

// create std::thread, catch exceptions using try/catch
#define CREATE_STD_THREAD(thread_id, func, args) \
@@ -624,7 +625,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

if (hcom_streams_.find(i) != hcom_streams_.end()) {
GE_CHK_RT_RET(rtStreamCreateWithFlags(&stream, priority_, stream_flags | RT_STREAM_FORCE_COPY));
} else {
} else {
GE_CHK_RT_RET(rtStreamCreateWithFlags(&stream, priority_, stream_flags));
}

@@ -2208,6 +2209,9 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data
runtime_param_.graph_id, data.second.GetOpName().c_str(), data.first, mem_addr, data_buf_addr, data_size,
data_buf_length);
GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind));
if (device_data) {
DynamicExecuteAddr::GetInstance().RemoveTensorValue(session_id_,data_buf_addr);
}
}

return SUCCESS;
@@ -2787,7 +2791,12 @@ void *DavinciModel::Run(DavinciModel *model) {
GELOGI("Copy input data, model id:%u", model_id);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_PRE_PROC_START));
ret = model->CopyInputData(current_data, false);
auto placement = current_data.blobs[0].placement;
if (placement == 1) {
ret = model->CopyInputData(current_data, true);
} else {
ret = model->CopyInputData(current_data, false);
}
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput());
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);


+ 204
- 30
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -32,6 +32,7 @@
#include "graph/common/local_context.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"
#include "graph/utils/graph_utils.h"

namespace ge {
thread_local uint32_t device_count = 0;
@@ -49,9 +50,13 @@ const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe";
const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe";
const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
const char *const kDeleteCustOp = "deleteCustOp";
const char *const kUsedStreamNum = "used_stream_num";
const char *const kStreamResource = "stream";
const char *const kEventResource = "event";
const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000;
const int kSessionMaxBias = 100;
const int kMaxEventNum = 1024;
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
uint32_t kernelSoBufLen;
@@ -69,7 +74,7 @@ std::mutex ModelManager::exeception_infos_mutex_;

std::shared_ptr<ModelManager> ModelManager::GetInstance() {
static const std::shared_ptr<ModelManager> instance_ptr =
shared_ptr<ModelManager>(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr);
shared_ptr<ModelManager>(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr);
return instance_ptr;
}

@@ -119,7 +124,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}

rt_ret =
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), &param_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), &param_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
@@ -368,6 +373,177 @@ void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciMode
hybrid_model_map_[id] = hybrid_model;
}

Status ModelManager::CheckStreamAndEventResource(const GeModelPtr &ge_model) {
GE_CHK_BOOL_EXEC(ge_model != nullptr, return FAILED, "ge model ptr is null");
int64_t value = 0;
bool ret = ge::AttrUtils::GetInt(ge_model, ATTR_MODEL_STREAM_NUM, value);
int64_t need_stream_num = ret ? value : 0;
ret = ge::AttrUtils::GetInt(ge_model, ATTR_MODEL_EVENT_NUM, value);
int64_t need_event_num = ret ? value : 0;

int64_t hccl_follow_stream = 0;
Status status = CalculateFollowStream(ge_model, hccl_follow_stream);
if (status != SUCCESS) {
GELOGE(FAILED, "Calculate hccl follow stream failed");
return FAILED;
}
need_stream_num = need_stream_num + hccl_follow_stream;

int64_t free_stream_num = 0;
int64_t free_event_num = 0;
status = GetFreeStreamAndEvent(free_stream_num, free_event_num);
if (status != SUCCESS) {
GELOGE(FAILED, "Get free steam and event failed");
return FAILED;
}
if (need_stream_num > free_stream_num) {
status = ReleaseRsource(need_stream_num, free_stream_num, kStreamResource);
if (status != SUCCESS) {
GELOGE(FAILED, "Release stream resource failed");
return FAILED;
}
}
if (need_event_num > free_event_num) {
status = ReleaseRsource(need_event_num, free_event_num, kEventResource);
if (status != SUCCESS) {
GELOGE(FAILED, "Release event resource failed");
return FAILED;
}
}
return SUCCESS;
}

Status ModelManager::CalculateFollowStream(const GeModelPtr &ge_model, int64_t &hccl_fellow_stream_num) {
const auto &model_def = ge_model->GetModelTaskDefPtr();
GE_CHECK_NOTNULL(model_def);
Graph graph = ge_model->GetGraph();
ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph);
GE_CHECK_NOTNULL(compute_graph);

map<uint32_t, OpDescPtr> op_list;
for (auto &node : compute_graph->GetDirectNode()) {
OpDescPtr op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
GELOGE(PARAM_INVALID, "Op desc is nullptr");
return PARAM_INVALID;
}
op_list.emplace(op_desc->GetId(), op_desc);
}

std::multimap<int64_t, int64_t> main_follow_num;
for (int i = 0; i < model_def->task_size(); i++) {
const domi::TaskDef &task = model_def->task(i);
if (static_cast<rtModelTaskType_t>(task.type()) == RT_MODEL_TASK_HCCL) {
auto hccl_def = task.kernel_hccl();
OpDescPtr hccl_op_desc = op_list.at(hccl_def.op_index());
int64_t main_stream_id = hccl_op_desc->GetStreamId();
int64_t follow_stream_num = 0;
if (!ge::AttrUtils::GetInt(hccl_op_desc, kUsedStreamNum, follow_stream_num)) {
GELOGW("Get used_stream_num failed, op is %s", hccl_op_desc->GetName().c_str());
}
main_follow_num.emplace(main_stream_id, follow_stream_num);
}
}
hccl_fellow_stream_num = CalFollowStreamSum(main_follow_num);
return SUCCESS;
}

int64_t ModelManager::CalFollowStreamSum(const std::multimap<int64_t, int64_t> &hccl_stream_map) {
int64_t need_follow_stream_num = 0;
std::map<int64_t, int64_t> max_follow_stream_map;
for (auto &it : hccl_stream_map) {
auto max_it = max_follow_stream_map.find(it.first);
if (max_it != max_follow_stream_map.end() && (it.second) > (max_it->second)) {
max_follow_stream_map.emplace(it.first, it.second);
}
}
for (auto &follow_it : max_follow_stream_map) {
need_follow_stream_num = need_follow_stream_num + follow_it.second;
}
return need_follow_stream_num;
}

Status ModelManager::ReleaseRsource(int64_t need_resource, int64_t free_resource, const string &resource_kind) {
while (need_resource > free_resource) {
uint32_t max_stream_model_id = 0;
uint32_t max_event_model_id = 0;
GetMaxStreamAndEventModel(max_stream_model_id, max_event_model_id);
GELOGD("The max stream num model is: %u,the max event num model is %u", max_stream_model_id, max_event_model_id);
std::lock_guard<std::mutex> lock(resource_mutex_);
if (resource_kind == "stream") {
Status ret = Unload(max_stream_model_id);
if (ret != SUCCESS) {
GELOGE(FAILED, "Unload max stream model failed ,model id : %u", max_stream_model_id);
return FAILED;
}
free_resource = free_resource - stream_map_.at(max_stream_model_id);
stream_map_.erase(max_stream_model_id);
GELOGD("Unload model for stream, model id : %u, stream num :%ld", max_stream_model_id,
stream_map_.at(max_stream_model_id));
}

if (resource_kind == "event") {
Status ret = Unload(max_event_model_id);
if (ret != SUCCESS) {
GELOGE(FAILED, "Unload max stream model failed ,model id : %u", max_stream_model_id);
return FAILED;
}
free_resource = free_resource - event_map_.at(max_event_model_id);
event_map_.erase(max_event_model_id);
GELOGD("Unload model for event, model id : %u, event num :%ld", max_event_model_id,
event_map_.at(max_event_model_id));
}
}
return SUCCESS;
}

Status ModelManager::GetFreeStreamAndEvent(int64_t &free_stream, int64_t &free_event) {
uint32_t max_stream_cout;
uint32_t max_task_cout;
rtError_t ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_cout, &max_task_cout);
if (ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Get max stream and task cout failed");
return FAILED;
}
GELOGD("Allowed max stream count: %u,max task cout per stream:%u", max_stream_cout, max_task_cout);
std::lock_guard<std::mutex> lock(resource_mutex_);
int64_t stream_sum = 0;
int64_t event_sum = 0;
for (auto &it : stream_map_) {
stream_sum = stream_sum + it.second;
}
for (auto &it : event_map_) {
event_sum = event_sum + it.second;
}
free_stream = max_stream_cout - stream_sum;
free_event = kMaxEventNum - event_sum;
return SUCCESS;
}

void ModelManager::GetMaxStreamAndEventModel(uint32_t &max_stream_model, uint32_t &max_event_model) {
std::lock_guard<std::mutex> lock(resource_mutex_);
int64_t max_stream_num = 0;
for (auto &it : stream_map_) {
if (it.second > max_stream_num) {
max_stream_num = it.second;
max_stream_model = it.first;
}
}
int64_t max_event_num = 0;
for (auto &it : event_map_) {
if (it.second > max_event_num) {
max_event_num = it.second;
max_event_model = it.first;
}
}
}

void ModelManager::InsertModelResource(uint32_t model_id, int64_t stream_num, int64_t event_num) {
std::lock_guard<std::mutex> lock(resource_mutex_);
stream_map_.emplace(model_id, stream_num);
event_map_.emplace(model_id, event_num);
}

Status ModelManager::DeleteModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);

@@ -459,8 +635,7 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
vector<int64_t> &cur_dynamic_dims) {
GELOGD(" Start get cur dynamic dims.");
if (user_real_input_dims.size() != user_input_dims.size()) {
GELOGE(INTERNAL_ERROR,
"The input count of user: %zu should be equal to the data count of graph: %zu",
GELOGE(INTERNAL_ERROR, "The input count of user: %zu should be equal to the data count of graph: %zu",
user_real_input_dims.size(), user_input_dims.size());
return INTERNAL_ERROR;
}
@@ -516,6 +691,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
DataBuffer data;
data.data = inputs[i].data;
data.length = inputs[i].length;
data.placement = inputs[i].placement;
input_data.blobs.push_back(data);
}
if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) {
@@ -527,7 +703,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
return INTERNAL_ERROR;
}
DataBuffer data;
data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()];
data.data = new (std::nothrow) int64_t[cur_dynamic_dims.size()];
GE_CHECK_NOTNULL(data.data);
uint64_t length = static_cast<uint64_t>(cur_dynamic_dims.size() * sizeof(int64_t));
GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR,
@@ -630,11 +806,13 @@ Status ModelManager::Stop(uint32_t model_id) {
///
Status ModelManager::HandleCommand(const Command &command) {
static const std::map<std::string, std::function<uint32_t(const Command &)>> cmds = {
{kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand},
{kCmdTypeProfStop, HandleProfStopCommand},
{kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand},
{kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}};
{kCmdTypeDump, HandleDumpCommand},
{kCmdTypeProfInit, HandleProfInitCommand},
{kCmdTypeProfFinalize, HandleProfFinalizeCommand},
{kCmdTypeProfStart, HandleProfStartCommand},
{kCmdTypeProfStop, HandleProfStopCommand},
{kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand},
{kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}};

auto iter = cmds.find(command.cmd_type);
if (iter == cmds.end()) {
@@ -645,17 +823,16 @@ Status ModelManager::HandleCommand(const Command &command) {
}
}

Status ModelManager::GetModelByCmd(const Command &command,
std::shared_ptr<DavinciModel> &davinci_model) {
Status ModelManager::GetModelByCmd(const Command &command, std::shared_ptr<DavinciModel> &davinci_model) {
if (command.cmd_params.size() < kCmdParSize) {
GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.",
command.cmd_type.c_str());
command.cmd_type.c_str());
return PARAM_INVALID;
}

std::string map_key = command.cmd_params[0];
std::string value = command.cmd_params[1];
if (map_key == PROFILE_MODEL_ID) {
if (map_key == PROFILE_MODEL_ID) {
int32_t model_id = 0;
try {
model_id = std::stoi(value);
@@ -692,8 +869,8 @@ Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) {
return ret;
}

if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index,
static_cast<void *>(davinci_model.get())) != SUCCESS) {
if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, static_cast<void *>(davinci_model.get())) !=
SUCCESS) {
GELOGE(FAILED, "Handle prof model subscribe failed.");
return FAILED;
}
@@ -1011,8 +1188,7 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);
"GetAIPPInfo failed, invalid model_id is %u.", model_id);

return davinci_model->GetAIPPInfo(index, aipp_info);
}
@@ -1020,8 +1196,7 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo
Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);
"GetAIPPInfo failed, invalid model_id is %u.", model_id);

return davinci_model->GetAippType(index, type, aipp_index);
}
@@ -1047,8 +1222,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK,
ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, ACL_ERROR_GE_PARAM_INVALID,
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
GenModelId(&model_id);

shared_ptr<DavinciModel> davinci_model = nullptr;
@@ -1142,8 +1317,8 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids) {
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK,
ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is not valid, %s",
model_data.key.c_str(), strerror(errno));
ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is not valid, %s", model_data.key.c_str(),
strerror(errno));

ModelHelper model_helper;
Status ret = model_helper.LoadModel(model_data);
@@ -1344,8 +1519,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
}
allocated_mem.push_back(d_so_name);
GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()),
so_name.size(), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()), so_name.size(),
RT_MEMCPY_HOST_TO_DEVICE));

CustAicpuSoBuf cust_aicpu_so_buf;
cust_aicpu_so_buf.kernelSoBuf = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data));
@@ -1379,8 +1554,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(batch_args);
GE_CHK_RT(rtMemcpy(batch_args, batch_args_size, static_cast<void *>(&batch_cust_so),
batch_args_size, RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(batch_args, batch_args_size, static_cast<void *>(&batch_cust_so), batch_args_size,
RT_MEMCPY_HOST_TO_DEVICE));

GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, batch_args, batch_args_size, nullptr, stream));
@@ -1473,8 +1648,7 @@ void ModelManager::GenModelId(uint32_t *id) {
Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetOrigInputInfo failed, invalid model_id is %u.",
model_id);
"GetOrigInputInfo failed, invalid model_id is %u.", model_id);

return davinci_model->GetOrigInputInfo(index, orig_input_info);
}


+ 13
- 3
ge/graph/load/new_model_manager/model_manager.h View File

@@ -177,8 +177,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
static ge::Status HandleProfStartCommand(const Command &command);
static ge::Status HandleProfStopCommand(const Command &command);

static ge::Status GetModelByCmd(const Command &command,
std::shared_ptr<DavinciModel> &davinci_model);
static ge::Status GetModelByCmd(const Command &command, std::shared_ptr<DavinciModel> &davinci_model);
///
/// @ingroup domi_ome
/// @brief get model memory usage
@@ -341,6 +340,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
void InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model);
void InsertModel(uint32_t id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model);

void InsertModelResource(uint32_t model_id, int64_t stream_num, int64_t event_num);

Status CheckStreamAndEventResource(const GeModelPtr &ge_model);
Status CalculateFollowStream(const GeModelPtr &ge_model, int64_t &hccl_fellow_stream_num);
int64_t CalFollowStreamSum(const std::multimap<int64_t, int64_t> &hccl_stream_map);
Status ReleaseRsource(int64_t need_resource, int64_t free_resource, const string &resource_kind);
Status GetFreeStreamAndEvent(int64_t &free_stream, int64_t &free_event);
void GetMaxStreamAndEventModel(uint32_t &max_stream_model, uint32_t &max_event_model);

///
/// @ingroup domi_ome
/// @brief delete model from model manager set
@@ -362,7 +370,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::vector<rtExceptionInfo> exception_infos_;
std::mutex cust_aicpu_mutex_;
std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_;

std::map<uint32_t, int64_t> stream_map_;
std::map<uint32_t, int64_t> event_map_;
std::mutex resource_mutex_;
static DumpProperties dump_properties_;
};
} // namespace ge


+ 13
- 1
ge/graph/manager/graph_manager.cc View File

@@ -133,6 +133,7 @@ const char *const kShapeDataName = "ascend_mbatch_shape_data";
const char *const kGetNextName = "IteratorV2";
const char *const kExtAttrDataNodes = "data_nodes";
const char *const kExtAttrGetNextNoSink = "getnext_no_sink";
const char *const kLazyRecompile = "LazyRecompile";

bool IsTailingOptimization() {
string is_tailing_optimization_option;
@@ -379,6 +380,17 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
graph_node->SetOptions(options);
AddGraphNode(graph_id, graph_node);

string dynamic_input;
Status status = ParseOption(options,DYNAMIC_INPUT,dynamic_input);
string dynamic_execute;
status = ParseOption(options,DYNAMIC_EXECUTE,dynamic_execute);

if (dynamic_input == "1" && dynamic_execute == kLazyRecompile) {
if (!AttrUtils::SetStr(*compute_graph, ATTR_NAME_DYNAMIC_EXECUTE, dynamic_execute)) {
GELOGW("Set attribute dynamic execute failed.");
}
}

AddLocalOmgContext(graph_id, omg_context);
if (!options_.output_datatype.empty()) {
GetLocalOmgContext().output_type = options_.output_datatype;
@@ -390,7 +402,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,

CompilerStages &stages = GetCompilerStages(graph_id);
stages.preparer.SetOptions(options_);
Status status = stages.optimizer.SetOptions(options_);
status = stages.optimizer.SetOptions(options_);
if (status != SUCCESS) {
GELOGE(status, "Graph optimizer set options failed.");
return status;


+ 28
- 3
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -20,10 +20,13 @@
#include "graph/utils/type_utils.h"
#include "graph/ge_context.h"
#include "omm/csa_interact.h"
#include "graph/execute/dynamic_execute_addr.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {
namespace hybrid {
namespace {
const char *const kLazyRecompile = "LazyRecompile";ss
int kDataOutputIndex = 0;
}
HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model)
@@ -290,6 +293,8 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
}

GELOGD("Number of outputs = %zu", output_tensor_desc_list.size());
map<void*,TensorValue> tensor_value_info;
map <uint64_t,map<void*,TensorValue>> session_tensor_values;
for (size_t i = 0; i < output_tensors.size(); ++i) {
GELOGD("Start to process output[%zu]", i);
auto &output_tensor = output_tensors[i];
@@ -326,16 +331,33 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
output.data_type = static_cast<uint32_t>(tensor_desc->GetDataType());
output.dims = tensor_desc->GetShape().GetDims();
output.length = output_size;
GeRootModelPtr ge_root_model = model_->ge_root_model_;
GE_CHECK_NOTNULL(ge_root_model);
ComputeGraphPtr computer_graph = ge_root_model->GetRootGraph();
string execute_mode;
if (!AttrUtils::GetStr(*computer_graph, ATTR_NAME_DYNAMIC_EXECUTE, execute_mode)) {
GELOGW("Can not get dynamic execute attr");
}
if (output_size > 0) {
std::unique_ptr<uint8_t[]> data_buf(new(std::nothrow) uint8_t[output_size]);
GE_CHECK_NOTNULL(data_buf);
GE_CHK_RT_RET(rtMemcpy(data_buf.get(),
if (execute_mode != kLazyRecompile){
std::unique_ptr<uint8_t[]> data_buf(new(std::nothrow) uint8_t[output_size]);
GE_CHECK_NOTNULL(data_buf);
GE_CHK_RT_RET(rtMemcpy(data_buf.get(),
output_size,
output_tensor.GetData(),
output_size,
RT_MEMCPY_DEVICE_TO_HOST));
output.data = std::move(data_buf);
output.placement = 0;
output_data->blobs.emplace_back(data_buf.get(), static_cast<uint32_t>(output_size), false);
} else {
GELOGD("The dynamic execute attr is %s", execute_mode.c_str());
output.data.reset(reinterpret_cast<uint8_t *>(const_cast<void *>(output_tensor.GetData())));
output.placement = 1;
tensor_value_info.emplace(output_tensor.GetData(),output_tensor);
output_data->blobs.emplace_back(output_tensor.GetData(), static_cast<uint32_t>(output_size), false);
}
} else {
GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str());
output.data = nullptr;
@@ -350,6 +372,9 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
output_size);
}

uint64_t session_id = executor_->GetContext()->session_id;
session_tensor_values.emplace(session_id,tensor_value_info);
DynamicExecuteAddr::GetInstance().SetDynamicExecuteDevAddr(session_id,session_tensor_values);
return SUCCESS;
}



+ 8
- 0
inc/external/ge/ge_api_types.h View File

@@ -297,12 +297,19 @@ const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path";
// Graph run mode
enum GraphRunMode { PREDICTION = 0, TRAIN };

// Dynamic input shape for training
const std::string DYNAMIC_INPUT = "ge.dynamic_input";

// Dynamic execute mode
const std::string DYNAMIC_EXECUTE = "ge.dynamic_execute";

// Input/Output tensor info
struct InputTensorInfo {
uint32_t data_type; // data type
std::vector<int64_t> dims; // shape description
void *data; // tensor data
int64_t length; // tensor length
uint32_t placement;
};

struct OutputTensorInfo {
@@ -310,6 +317,7 @@ struct OutputTensorInfo {
std::vector<int64_t> dims; // shape description
std::unique_ptr<uint8_t[]> data; // tensor data
int64_t length; // tensor length
uint32_t placement;
OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {}
OutputTensorInfo(OutputTensorInfo &&out)
: data_type(out.data_type), dims(out.dims), data(std::move(out.data)), length(out.length) {}


+ 1
- 0
inc/framework/common/ge_types.h View File

@@ -59,6 +59,7 @@ struct DataBuffer {
void *data; // Data address
uint64_t length; // Data length
bool isDataSupportMemShare = false;
uint32_t placement;
DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare)
: data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {}



Loading…
Cancel
Save