| @@ -344,6 +344,7 @@ set(TRAIN_SRC_LIST | |||
| "single_op/task/op_task.cc" | |||
| "single_op/task/tbe_task_builder.cc" | |||
| "single_op/task/aicpu_task_builder.cc" | |||
| "single_op/task/rts_kernel_task_builder.cc" | |||
| "single_op/task/aicpu_kernel_task_builder.cc" | |||
| "hybrid/common/tensor_value.cc" | |||
| "hybrid/common/npu_memory_allocator.cc" | |||
| @@ -30,6 +30,7 @@ | |||
| #include "runtime/rt.h" | |||
| #include "task/aicpu_task_builder.h" | |||
| #include "task/aicpu_kernel_task_builder.h" | |||
| #include "task/rts_kernel_task_builder.h" | |||
| #include "task/tbe_task_builder.h" | |||
| #include "hybrid/executor/hybrid_model_executor.h" | |||
| #include "hybrid/node_executor/node_executor.h" | |||
| @@ -248,7 +249,9 @@ Status SingleOpModel::ParseInputsAndOutputs() { | |||
| for (auto &op_desc : data_ops_) { | |||
| GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc)); | |||
| } | |||
| ParseOutputNode(netoutput_op_); | |||
| if (netoutput_op_ != nullptr) { | |||
| ParseOutputNode(netoutput_op_); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -330,6 +333,17 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s | |||
| aicpu_task->SetModelArgs(model_name_, model_id_); | |||
| ParseArgTable(aicpu_task, single_op); | |||
| single_op.tasks_.emplace_back(aicpu_task); | |||
| } else if (task_type == RT_MODEL_TASK_MEMCPY_ASYNC || task_type == RT_MODEL_TASK_MEMCPY_ADDR_ASYNC) { | |||
| auto kernel_def = task_def.memcpy_async(); | |||
| auto node = op_list_[kernel_def.op_index()]; | |||
| GE_CHECK_NOTNULL(node); | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_CHECK_NOTNULL(op_desc); | |||
| std::unique_ptr<MemcpyAsyncTask> task; | |||
| GE_CHK_STATUS_RET_NOLOG(RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, model_params_, task)); | |||
| task->SetModelArgs(model_name_, model_id_); | |||
| ParseArgTable(task.get(), single_op); | |||
| single_op.tasks_.emplace_back(task.release()); | |||
| } else { | |||
| // skip | |||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
| @@ -22,6 +22,7 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <single_op/task/op_task.h> | |||
| #include "common/helper/model_helper.h" | |||
| #include "single_op/single_op.h" | |||
| @@ -35,6 +35,7 @@ namespace ge { | |||
| namespace { | |||
| constexpr int kLaunchRetryTimes = 1000; | |||
| constexpr int kSleepTime = 10; | |||
| constexpr size_t kMemcpyArgCount = 2; | |||
| constexpr uint64_t kReleaseFlag = 1; | |||
| constexpr int kCopyNum = 2; | |||
| constexpr uint64_t kInferSessionId = 0; | |||
| @@ -911,4 +912,16 @@ void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||
| arg_base = io_addr_; | |||
| arg_count = io_addr_num_; | |||
| } | |||
| Status MemcpyAsyncTask::LaunchKernel(rtStream_t stream) { | |||
| auto src_addr = reinterpret_cast<void *>(addresses_[0]); | |||
| auto dst_addr = reinterpret_cast<void *>(addresses_[1]); | |||
| GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, dst_max_, src_addr, count_, kind_, stream)); | |||
| return SUCCESS; | |||
| } | |||
| void MemcpyAsyncTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||
| arg_base = addresses_; | |||
| arg_count = kMemcpyArgCount; | |||
| } | |||
| } // namespace ge | |||
| @@ -44,6 +44,9 @@ class OpTask { | |||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
| void SetModelArgs(std::string model_name, uint32_t model_id); | |||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
| void SetOpDesc(const OpDescPtr &op_desc) { | |||
| op_desc_ = op_desc; | |||
| } | |||
| const OpDescPtr &GetOpdesc() const {return op_desc_;} | |||
| Status OpenDump(rtStream_t stream); | |||
| virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0; | |||
| @@ -242,6 +245,22 @@ private: | |||
| std::string op_type_; | |||
| uint64_t kernel_id_ = 0; | |||
| }; | |||
| class MemcpyAsyncTask : public OpTask { | |||
| public: | |||
| Status LaunchKernel(rtStream_t stream) override; | |||
| void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; | |||
| private: | |||
| friend class SingleOpModel; | |||
| friend class RtsKernelTaskBuilder; | |||
| uintptr_t addresses_[2]; | |||
| size_t dst_max_; | |||
| size_t count_; | |||
| rtMemcpyKind_t kind_; | |||
| NodePtr node_; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_SINGLE_OP_TASK_OP_TASK_H_ | |||
| @@ -0,0 +1,45 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "rts_kernel_task_builder.h" | |||
| #include "build_task_utils.h" | |||
| namespace ge { | |||
| namespace { | |||
| const size_t kNumAddresses = 2; | |||
| } // namespace | |||
| Status RtsKernelTaskBuilder::BuildMemcpyAsyncTask(const OpDescPtr &op_desc, | |||
| const domi::MemcpyAsyncDef &kernel_def, | |||
| const SingleOpModelParam ¶m,r | |||
| std::unique_ptr<MemcpyAsyncTask> &task) { | |||
| task.reset(new(std::nothrow)MemcpyAsyncTask()); | |||
| GE_CHECK_NOTNULL(task); | |||
| task->SetOpDesc(op_desc); | |||
| task->dst_max_ = kernel_def.dst_max(); | |||
| task->count_ = kernel_def.count(); | |||
| task->kind_ = static_cast<rtMemcpyKind_t>(kernel_def.kind()); | |||
| auto addresses = BuildTaskUtils::JoinAddresses(BuildTaskUtils::GetAddresses(op_desc, param, false)); | |||
| if (addresses.size() != kNumAddresses) { | |||
| GELOGE(INTERNAL_ERROR, "[Build][MemcpyAsyncTask] Invalid address count: %zu", addresses.size()); | |||
| return INTERNAL_ERROR; | |||
| } | |||
| task->addresses_[0] = reinterpret_cast<uintptr_t>(addresses[0]); | |||
| task->addresses_[1] = reinterpret_cast<uintptr_t>(addresses[1]); | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_ | |||
| #define GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_ | |||
| #include <vector> | |||
| #include "graph/op_desc.h" | |||
| #include "single_op/single_op.h" | |||
| #include "single_op/single_op_model.h" | |||
| namespace ge { | |||
| class RtsKernelTaskBuilder { | |||
| public: | |||
| static Status BuildMemcpyAsyncTask(const OpDescPtr &op_desc, | |||
| const domi::MemcpyAsyncDef &kernel_def, | |||
| const SingleOpModelParam ¶m, | |||
| std::unique_ptr<MemcpyAsyncTask> &task); | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_ | |||
| @@ -25,6 +25,7 @@ | |||
| #define private public | |||
| #include "single_op/single_op_model.h" | |||
| #include "single_op/task/tbe_task_builder.h" | |||
| #include "single_op/task/rts_kernel_task_builder.h" | |||
| #undef private | |||
| #undef protected | |||
| @@ -223,3 +224,26 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { | |||
| model.BuildDynamicOp(res, dynamic_single_op); | |||
| } | |||
| TEST_F(UtestSingleOpModel, test_build_memcpy_task) { | |||
| auto op_desc = std::make_shared<OpDesc>(MEMCPYASYNC, MEMCPYASYNC); | |||
| domi::MemcpyAsyncDef kernel_def; | |||
| kernel_def.set_dst_max(8); | |||
| kernel_def.set_count(8); | |||
| kernel_def.set_kind(2); | |||
| GeShape shape({2}); | |||
| GeTensorDesc tensor_desc(shape); | |||
| op_desc->AddInputDesc("x", tensor_desc); | |||
| op_desc->AddOutputDesc("y", tensor_desc); | |||
| op_desc->SetInputOffset({0}); | |||
| op_desc->SetOutputOffset({0}); | |||
| std::unique_ptr<MemcpyAsyncTask> task; | |||
| SingleOpModelParam param{}; | |||
| auto ret = RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, param, task); | |||
| ASSERT_EQ(ret, SUCCESS); | |||
| op_desc->SetInputOffset({0, 0}); | |||
| op_desc->AddOutputDesc("y2", tensor_desc); | |||
| ret = RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, param, task); | |||
| ASSERT_EQ(ret, INTERNAL_ERROR); | |||
| } | |||