Browse Source

!18987 use transdata to improve performance in print process

Merge pull request !18987 from lvchangquan/transdata_formal
tags/v1.4.0
i-robot Gitee 4 years ago
parent
commit
0e6c4071f5
7 changed files with 242 additions and 291 deletions
  1. +48
    -259
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
  2. +6
    -6
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
  3. +119
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.cc
  4. +68
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.h
  5. +0
    -22
      mindspore/ccsrc/runtime/device/kernel_runtime.cc
  6. +0
    -4
      mindspore/ccsrc/runtime/device/kernel_runtime.h
  7. +1
    -0
      tests/ut/cpp/CMakeLists.txt

+ 48
- 259
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc View File

@@ -25,11 +25,10 @@
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/memory_manager.h"
#include "runtime/device/convert_tensor_utils.h"
#include "runtime/device/ascend/ascend_launch_transdata.h"
#include "ir/dtype/type.h"
#include "ir/tensor.h"
#include "abstract/utils.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "utils/utils.h"
#include "common/trans.h"
#include "debug/data_dump/dump_json_parser.h"
@@ -60,33 +59,6 @@ const std::set<std::pair<std::string, std::string>> use_trans_data = {
std::make_pair("int32", mindspore::kOpFormat_HWCN), std::make_pair("int64", mindspore::kOpFormat_HWCN),
std::make_pair("uint8", mindspore::kOpFormat_HWCN), std::make_pair("uint16", mindspore::kOpFormat_HWCN),
std::make_pair("uint32", mindspore::kOpFormat_HWCN), std::make_pair("uint64", mindspore::kOpFormat_HWCN)};
constexpr auto src_format = "src_format";
constexpr auto dst_format = "dst_format";
constexpr auto src = "src_0";
constexpr auto dst = "dst";
constexpr auto param_type_required = "required";
constexpr auto gen_model_single = "single";
constexpr auto trans_data = "trans_data";
constexpr auto platform_tbe = "TBE";
constexpr auto name = "name";
constexpr auto valid = "valid";
constexpr auto value = "value";
constexpr auto dtype = "dtype";
constexpr auto format_str = "format";
constexpr auto ori_format = "ori_format";
constexpr auto ori_shape = "ori_shape";
constexpr auto param_type = "param_type";
constexpr auto shape_str = "shape";
constexpr auto process_aicore = "aicore";
constexpr auto gen_model_str = "gen_model";
constexpr auto impl_path_str = "impl_path";
constexpr auto attrs_str = "attrs";
constexpr auto inputs_str = "inputs";
constexpr auto outputs_str = "outputs";
constexpr auto kernel_name_str = "kernel_name";
constexpr auto op_info_str = "op_info";
constexpr auto platform_str = "platform";
constexpr auto fractal_z = "FRACTAL_Z";
} // namespace

namespace mindspore {
@@ -167,115 +139,6 @@ bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *s
return true;
}

DeviceAddressPtr AssignLaunchMemory(size_t size, const std::string &format, TypeId type) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id);
MS_EXCEPTION_IF_NULL(runtime_instance);
auto address_ptr = runtime_instance->AssignSingleOpLaunchMemory(size, format, type);
return address_ptr;
}

nlohmann::json ConstructAttrs(const std::string &format) {
nlohmann::json real_attr;
nlohmann::json src_attr;
nlohmann::json des_attr;
src_attr[name] = src_format;
src_attr[valid] = true;
if (format == kOpFormat_FRAC_Z) {
src_attr[value] = fractal_z;
} else {
src_attr[value] = format;
}
des_attr[name] = dst_format;
des_attr[valid] = true;
des_attr[value] = kOpFormat_NCHW;
real_attr.push_back(src_attr);
real_attr.push_back(des_attr);
return real_attr;
}

nlohmann::json ConstructInputs(const std::vector<size_t> &input_shape, const std::vector<size_t> &output_shape,
const std::string &format, mindspore::TypeId type) {
nlohmann::json input;
nlohmann::json input_json;
nlohmann::json real_input;
real_input[dtype] = type_id_name_map.at(type);
if (format == kOpFormat_FRAC_Z) {
real_input[format_str] = fractal_z;
} else {
real_input[format_str] = format;
}
real_input[name] = src;
real_input[ori_format] = kOpFormat_NCHW;
for (auto shape : output_shape) {
(void)real_input[ori_shape].emplace_back(shape);
}
real_input[param_type] = param_type_required;
// obtain inputs shape
for (auto shape : input_shape) {
(void)real_input[shape_str].emplace_back(shape);
}
real_input[valid] = true;
input_json.push_back(real_input);
input.push_back(input_json);
return input;
}

nlohmann::json ConstructOutputs(const std::vector<size_t> &output_shape, mindspore::TypeId type) {
nlohmann::json output;
nlohmann::json output_json;
nlohmann::json real_output;
real_output[dtype] = type_id_name_map.at(type);
real_output[format_str] = kOpFormat_NCHW;
real_output[name] = dst;
real_output[ori_format] = kOpFormat_NCHW;
for (auto shape : output_shape) {
(void)real_output[ori_shape].emplace_back(shape);
}
real_output[param_type] = param_type_required;
// obtain outputs shape
for (auto shape : output_shape) {
(void)real_output[shape_str].emplace_back(shape);
}
real_output[valid] = true;
output_json.push_back(real_output);
output.push_back(output_json);
return output;
}

nlohmann::json ConstructTransDataKernelJson(const std::vector<size_t> &host_shape,
const std::vector<size_t> &device_shape, const std::string &format,
mindspore::TypeId type) {
// generate kernel json
nlohmann::json kernel_json;
kernel_json[gen_model_str] = gen_model_single;
kernel_json[impl_path_str] = "";
// construct op_info
nlohmann::json op_info;
op_info[attrs_str] = ConstructAttrs(format);
op_info[inputs_str] = ConstructInputs(device_shape, host_shape, format, type);
op_info[kernel_name_str] = "";
op_info[name] = trans_data;
op_info[outputs_str] = ConstructOutputs(host_shape, type);
// construct soc_info
nlohmann::json soc_info;
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto tune_mode = ms_context->get_param<std::string>(MS_CTX_TUNE_MODE);
soc_info["autoTilingMode"] = tune_mode;
kernel_json["SocInfo"] = soc_info;
kernel_json[op_info_str] = op_info;
kernel_json[platform_str] = platform_tbe;
std::string json_str = kernel_json[op_info_str].dump();
size_t hash_id = std::hash<std::string>()(json_str);
const std::string op_name = op_info[name];
const std::string json_name = op_name + "_" + std::to_string(hash_id);
kernel_json[op_info_str][kernel_name_str] = json_name;
return kernel_json;
}

void AscendDeviceAddress::SyncStream() const {
MS_LOG(DEBUG) << "Start!";
auto ms_context = MsContext::GetInstance();
@@ -352,80 +215,34 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size
return sync_ok;
}

void AscendDeviceAddress::LaunchTransData(const kernel::KernelModPtr &kernel_mod_ptr, void *output_address_ptr,
size_t output_size, const std::vector<size_t> &workspace_size_list) const {
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
auto input_address = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input_address);
input_address->addr = ptr_;
input_address->size = size_;
auto output_address = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(output_address);
output_address->addr = output_address_ptr;
output_address->size = output_size;
AddressPtrList kernel_inputs = {input_address};
AddressPtrList kernel_outputs = {output_address};
AddressPtrList kernel_workspaces;
std::vector<DeviceAddressPtr> workspace_address_ptr(workspace_size_list.size());
if (!workspace_size_list.empty()) {
for (size_t i = 0; i < workspace_size_list.size(); ++i) {
auto workspace_size = MemoryManager::GetCommonAlignSize(workspace_size_list[i]);
workspace_address_ptr[i] = AssignLaunchMemory(workspace_size, "", kTypeUnknown);
MS_EXCEPTION_IF_NULL(workspace_address_ptr[i]);
auto workspace_address = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(workspace_address);
workspace_address->addr = workspace_address_ptr[i]->GetMutablePtr();
workspace_address->size = workspace_address_ptr[i]->GetSize();
kernel_workspaces.push_back(workspace_address);
std::vector<size_t> AscendDeviceAddress::GetDeviceShape(std::vector<size_t> *host_shape) const {
std::vector<size_t> device_shape;
auto node_index = GetNodeIndex();
if (format_ == kOpFormat_FRAC_NZ || format_ == kOpFormat_NCDHW) {
device_shape = trans::TransShapeToDevice(*host_shape, format_, node_index.first, node_index.second);
} else {
if (host_shape_.empty()) {
*host_shape = trans::PaddingShape(*host_shape, format_);
} else {
host_shape->clear();
(void)std::transform(host_shape_.begin(), host_shape_.end(), std::back_inserter(*host_shape), LongToSize);
}
device_shape = trans::TransShapeToDevice(*host_shape, format_, node_index.first, node_index.second);
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id);
MS_EXCEPTION_IF_NULL(runtime_instance);
auto ret =
runtime_instance->LaunchTaskBasedOnSingleKernel(kernel_mod_ptr, kernel_inputs, kernel_outputs, kernel_workspaces);
if (!ret) {
MS_LOG(ERROR) << "Launch kernel failed.";
}
SyncStream();
return device_shape;
}

kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(const nlohmann::json &kernel_json) const {
static std::set<std::string> constructed_kernel = {};
auto build_manager = std::make_shared<kernel::ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manager);
std::string processor = process_aicore;
// get size
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
(void)kernel::TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, nullptr);
std::string json_name = kernel_json[op_info_str][kernel_name_str];
// op build
if (constructed_kernel.find(json_name) == constructed_kernel.end()) {
auto task_id = kernel::ParallelBuildManager::StartCompileOp(kernel_json);
build_manager->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list);
}
while (!build_manager->IsAllTaskFinish()) {
int task_id = -1;
std::string task_result;
std::string build_result;
auto ret = build_manager->WaitOne(&task_id, &task_result, &build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if (task_result != "Success") {
MS_EXCEPTION(ArgumentError) << "task compile Failed, task id:" << task_id << ", cause:" << task_result;
}
(void)build_manager->TaskFinishProcess(task_id, build_result, false);
}
(void)constructed_kernel.insert(json_name);
// search cache
auto cached_kernel_pack = TbeUtils::SearchCache(json_name, processor);
MS_EXCEPTION_IF_NULL(cached_kernel_pack);
auto kernel_mod_ptr = build_manager->GenKernelMod(input_size_list, output_size_list, cached_kernel_pack);
return kernel_mod_ptr;
std::shared_ptr<LaunchKernel> AscendDeviceAddress::CreateLaunchTransData(const std::vector<size_t> &host_shape,
const std::vector<size_t> &device_shape,
const std::string &ori_format,
const std::string &dst_format) const {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
MS_EXCEPTION_IF_NULL(runtime_instance);
auto stream = runtime_instance->compute_stream();
auto launch_trans_data =
std::make_shared<AscendLaunchTransData>(stream, type_id_, size_, ori_format, dst_format, device_shape, host_shape);
MS_EXCEPTION_IF_NULL(launch_trans_data);
return launch_trans_data;
}

bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const std::vector<size_t> &host_shape,
@@ -433,33 +250,27 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
size_t size, mindspore::TypeId type,
void *host_ptr) const {
bool sync_ok = true;
// construct trans data kernel json
nlohmann::json kernel_json = ConstructTransDataKernelJson(host_shape, device_shape, format_, type_id_);
MS_LOG(INFO) << "Construct trans_data kernel json: " << kernel_json.dump();
auto kernel_mod_ptr = CompileTransDataAndObtainKernelMod(kernel_json);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
auto host_size = size;
if (type_id_ != type) {
auto device_dtype_size = abstract::TypeIdSize(type_id_);
if (device_dtype_size < 1) {
MS_LOG(ERROR) << "Illegal dtype.";
}
auto shape_size = abstract::ShapeSize(host_shape);
size = device_dtype_size * shape_size;
std::string dst_format = kOpFormat_NCHW;
if (launch_transdata_ == nullptr) {
launch_transdata_ = CreateLaunchTransData(host_shape, device_shape, format_, dst_format);
MS_EXCEPTION_IF_NULL(launch_transdata_);
}
// launch transdata
launch_transdata_->SetInputAddr(static_cast<uint8_t *>(ptr_));
launch_transdata_->LaunchOpKernel();
SyncStream();
auto output_addr_vec = launch_transdata_->GetKernelOutputAddr();
if (output_addr_vec.size() != 1) {
MS_LOG(EXCEPTION) << "launch transdata outputs should have only one output";
return false;
}
size = MemoryManager::GetCommonAlignSize(size);
auto output_address = AssignLaunchMemory(size, kOpFormat_NCHW, type_id_);
MS_EXCEPTION_IF_NULL(output_address);
auto workspace_size_list = GetWorkspaceSizeList(kernel_json);
// launch
LaunchTransData(kernel_mod_ptr, output_address->GetMutablePtr(), output_address->GetSize(), workspace_size_list);
if (type_id_ == type) {
SyncMemory(host_ptr, output_address->GetPtr(), host_size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host_ptr, output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST);
} else {
auto host = std::vector<uint8_t>(size);
SyncMemory(host.data(), output_address->GetPtr(), size, RT_MEMCPY_DEVICE_TO_HOST);
SyncMemory(host.data(), output_addr_vec[0], size, RT_MEMCPY_DEVICE_TO_HOST);
auto shape_size = abstract::ShapeSize(host_shape);
const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, host_size};
const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size};
sync_ok = trans::TransDataType(type_args, host_ptr);
if (!sync_ok) {
MS_LOG(ERROR) << "Trans format failed.";
@@ -469,32 +280,6 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormatBasedOnTransData(const
return sync_ok;
}

std::vector<size_t> AscendDeviceAddress::GetWorkspaceSizeList(const nlohmann::json &kernel_json) const {
std::string json_name = kernel_json[op_info_str][kernel_name_str];
std::string processor = process_aicore;
auto cached_kernel_pack = TbeUtils::SearchCache(json_name, processor);
MS_EXCEPTION_IF_NULL(cached_kernel_pack);
auto kernel_json_info = cached_kernel_pack->kernel_json_info();
return kernel_json_info.workspaces;
}

std::vector<size_t> AscendDeviceAddress::GetDeviceShape(std::vector<size_t> *host_shape) const {
std::vector<size_t> device_shape;
auto node_index = GetNodeIndex();
if (format_ == kOpFormat_FRAC_NZ || format_ == kOpFormat_NCDHW) {
device_shape = trans::TransShapeToDevice(*host_shape, format_, node_index.first, node_index.second);
} else {
if (host_shape_.empty()) {
*host_shape = trans::PaddingShape(*host_shape, format_);
} else {
host_shape->clear();
(void)std::transform(host_shape_.begin(), host_shape_.end(), std::back_inserter(*host_shape), LongToSize);
}
device_shape = trans::TransShapeToDevice(*host_shape, format_, node_index.first, node_index.second);
}
return device_shape;
}

bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const ShapeVector &shape, size_t size,
mindspore::TypeId type, void *host_ptr) const {
MS_LOG(INFO) << "SyncDeviceToHostAndConvertFormat, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_)
@@ -508,8 +293,7 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const ShapeVector &sh
std::vector<size_t> device_shape = GetDeviceShape(&host_shape);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kGraphMode &&
ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode &&
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode &&
type_id_name_map.find(type_id_) != type_id_name_map.end()) {
std::pair<std::string, std::string> type_format = std::make_pair(type_id_name_map.at(type_id_), format_);
if (use_trans_data.find(type_format) != use_trans_data.end()) {
@@ -660,7 +444,12 @@ void AscendDeviceAddress::ClearDeviceMemory() {
}
}

AscendDeviceAddress::~AscendDeviceAddress() { ClearDeviceMemory(); }
AscendDeviceAddress::~AscendDeviceAddress() {
ClearDeviceMemory();
if (launch_transdata_ != nullptr) {
launch_transdata_->FreeLaunchDeviceMem();
}
}

bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, bool trans_flag) const {


+ 6
- 6
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h View File

@@ -20,7 +20,6 @@
#include <string>
#include <vector>
#include <memory>
#include <nlohmann/json.hpp>
#include "runtime/device/device_address.h"
#include "runtime/device/ascend/ascend_memory_pool.h"
#include "ir/dtype.h"
@@ -32,6 +31,7 @@ namespace mindspore {
class Debugger;
#endif
namespace device {
class LaunchKernel;
namespace ascend {
class AscendDeviceAddress : public DeviceAddress {
public:
@@ -63,12 +63,12 @@ class AscendDeviceAddress : public DeviceAddress {
const std::vector<size_t> &device_shape, size_t size,
mindspore::TypeId type, void *host_ptr) const;
void SyncStream() const;

void LaunchTransData(const kernel::KernelModPtr &kernel_mod_ptr, void *output_address_ptr, size_t output_size,
const std::vector<size_t> &workspace_size_list) const;
std::vector<size_t> GetDeviceShape(std::vector<size_t> *host_shape) const;
std::vector<size_t> GetWorkspaceSizeList(const nlohmann::json &kernel_json) const;
kernel::KernelModPtr CompileTransDataAndObtainKernelMod(const nlohmann::json &kernel_json) const;
std::shared_ptr<LaunchKernel> CreateLaunchTransData(const std::vector<size_t> &host_shape,
const std::vector<size_t> &device_shape,
const std::string &ori_format,
const std::string &dst_format) const;
mutable std::shared_ptr<LaunchKernel> launch_transdata_{nullptr};
};
using AscendDeviceAddressPtr = std::shared_ptr<AscendDeviceAddress>;
} // namespace ascend


+ 119
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.cc View File

@@ -0,0 +1,119 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "runtime/device/ascend/ascend_launch_transdata.h"
#include <memory>
#include <vector>
#include <algorithm>
#include "abstract/utils.h"
#include "backend/session/single_kernel_graph.h"
#include "backend/session/anf_runtime_algorithm.h"

namespace mindspore::device::ascend {
void AscendLaunchTransData::FreeDeviceMem(void *addr) { AscendLaunchKernel::FreeDeviceMem(addr); }

size_t AscendLaunchTransData::AlignSizeForLaunchKernel(size_t size) {
return AscendLaunchKernel::AlignSizeForLaunchKernel(size);
}

uint8_t *AscendLaunchTransData::AllocDeviceMem(size_t size) { return AscendLaunchKernel::AllocDeviceMem(size); }

void AscendLaunchTransData::KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) {
AscendLaunchKernel::KernelSelect(kernel_graph);
}

void AscendLaunchTransData::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) {
AscendLaunchKernel::KernelBuild(kernel_graph);
}

void AscendLaunchTransData::LaunchOpKernel() {
if (transdata_graph_ == nullptr) {
// construct transdata kernel graph and set attr
ConstructKernelGraphAndSetAttr();
// kernel build
KernelBuild(transdata_graph_);
}
// obtain kernel_mod
if (transdata_graph_->execution_order().size() != 1) {
MS_LOG(ERROR) << "the execution order of the transdata graph should have only one node";
}
kernel_mod_ = AnfAlgo::GetKernelMod(transdata_graph_->execution_order()[0]);
MS_EXCEPTION_IF_NULL(kernel_mod_);
// obtain kernel inputs
std::vector<kernel::AddressPtr> kernel_inputs;
auto input = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input);
input->addr = input_addr_;
MS_EXCEPTION_IF_NULL(input->addr);
input->size = total_size_;
kernel_inputs.push_back(input);
// obtain kernel outputs
auto kernel_outputs = ObtainKernelOutputs(kernel_mod_->GetOutputSizeList());
// obtain kernel workspaces
auto kernel_workspace = ObtainKernelWorkspaces(kernel_mod_->GetWorkspaceSizeList());
// launch
auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspace, kernel_outputs, stream_);
if (!ret_status) {
MS_LOG(ERROR) << "Launch transdata single kernel failed";
}
}

void AscendLaunchTransData::FreeLaunchDeviceMem() {
input_addr_ = nullptr;
FreeOutputAndWorkspaceDeviceMem();
}

std::shared_ptr<session::KernelGraph> AscendLaunchTransData::ObtainTransDataKernelGraph() {
std::vector<TypeId> input_dtypes = {dtype_};
std::vector<TypeId> output_dtypes = {dtype_};
// obtain input & output shape
std::vector<int64_t> input_shape;
std::transform(output_shape_.begin(), output_shape_.end(), std::back_inserter(input_shape),
[](const size_t &value) { return static_cast<int64_t>(value); });
std::vector<std::vector<int64_t>> input_shapes = {{input_shape}};
std::vector<std::vector<size_t>> output_shapes = {{input_shape_}};
auto transdata_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
kTransDataOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
MS_EXCEPTION_IF_NULL(transdata_graph);
return transdata_graph;
}

void AscendLaunchTransData::ConstructKernelGraphAndSetAttr() {
// construct transdata kernel graph
transdata_graph_ = ObtainTransDataKernelGraph();
MS_EXCEPTION_IF_NULL(transdata_graph_);
// set transdata attr
if (!transdata_graph_->execution_order().empty()) {
auto transdata_node = transdata_graph_->execution_order()[0];
// set output infer type and shape
AnfAlgo::SetOutputInferTypeAndShape({dtype_}, {output_shape_}, transdata_node.get());
// set build info
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
builder->SetKernelType(KernelType::TBE_KERNEL);
std::vector<TypeId> device_type = {dtype_};
builder->SetInputsDeviceType(device_type);
builder->SetOutputsDeviceType(device_type);
std::vector<std::string> inputs_format = {src_format_};
std::vector<std::string> outputs_format = {dst_format_};
builder->SetInputsFormat(inputs_format);
builder->SetOutputsFormat(outputs_format);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), transdata_node.get());
// set attr
AnfAlgo::SetNodeAttr(kAttrSrcFormat, MakeValue(src_format_), transdata_node);
AnfAlgo::SetNodeAttr(kAttrDstFormat, MakeValue(dst_format_), transdata_node);
}
}
} // namespace mindspore::device::ascend

+ 68
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.h View File

@@ -0,0 +1,68 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_LAUNCH_TRANSDATA_H_
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_LAUNCH_TRANSDATA_H_

#include <vector>
#include <memory>
#include <string>
#include "runtime/device/ascend/ascend_launch_kernel.h"

namespace mindspore::device::ascend {
class AscendLaunchTransData : public AscendLaunchKernel {
public:
AscendLaunchTransData(void *stream, TypeId dtype, size_t total_size, std::string src_format, std::string dst_format,
std::vector<size_t> input_shape, std::vector<size_t> output_shape)
: AscendLaunchKernel(stream),
dtype_(dtype),
total_size_(total_size),
transdata_graph_(nullptr),
input_addr_(nullptr),
src_format_(src_format),
dst_format_(dst_format),
input_shape_(input_shape),
output_shape_(output_shape) {}

~AscendLaunchTransData() override = default;

void SetInputAddr(uint8_t *input_addr) override { input_addr_ = input_addr; }
void FreeDeviceMem(void *addr) override;
size_t AlignSizeForLaunchKernel(size_t size) override;
uint8_t *AllocDeviceMem(size_t size) override;
void KernelSelect(std::shared_ptr<session::KernelGraph> kernel_graph) override;
void KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) override;

void LaunchOpKernel() override;
void FreeLaunchDeviceMem() override;

protected:
TypeId dtype_;
size_t total_size_;
std::shared_ptr<session::KernelGraph> transdata_graph_;
uint8_t *input_addr_;
std::string src_format_;
std::string dst_format_;
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;

private:
std::shared_ptr<session::KernelGraph> ObtainTransDataKernelGraph();
void ConstructKernelGraphAndSetAttr();
};
} // namespace mindspore::device::ascend

#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_LAUNCH_TRANSDATA_H_

+ 0
- 22
mindspore/ccsrc/runtime/device/kernel_runtime.cc View File

@@ -1087,28 +1087,6 @@ void KernelRuntime::ClearOutputAddress(const std::vector<AnfNodePtr> &inputs,
}
}

bool KernelRuntime::LaunchTaskBasedOnSingleKernel(const kernel::KernelModPtr &kernel_mod_ptr,
const AddressPtrList &kernel_inputs,
const AddressPtrList &kernel_outputs,
const AddressPtrList &kernel_workspaces) const {
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
auto ret = kernel_mod_ptr->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
if (!ret) {
MS_LOG(ERROR) << "Launch kernel failed.";
return false;
}
return true;
}

DeviceAddressPtr KernelRuntime::AssignSingleOpLaunchMemory(size_t size, const std::string &format, TypeId type) {
auto device_address = CreateDeviceAddress(nullptr, size, format, type);
MS_EXCEPTION_IF_NULL(device_address);
MS_EXCEPTION_IF_NULL(mem_manager_);
auto base_ptr = mem_manager_->MallocMem(kStaticMem, size, device_address);
MS_EXCEPTION_IF_NULL(base_ptr);
return device_address;
}

#if (ENABLE_CPU && !_WIN32)
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph *graph,
AnfNodePtr *const first_cache_input_index,


+ 0
- 4
mindspore/ccsrc/runtime/device/kernel_runtime.h View File

@@ -63,9 +63,6 @@ class KernelRuntime {
virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0;
virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0;
bool LaunchKernel(const session::KernelGraph *graph);
bool LaunchTaskBasedOnSingleKernel(const kernel::KernelModPtr &kernel_mod_ptr, const AddressPtrList &kernel_inputs,
const AddressPtrList &kernel_outputs,
const AddressPtrList &kernel_workspaces) const;
virtual void AssignStaticMemoryInput(const session::KernelGraph *graph);
virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph);
virtual void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
@@ -94,7 +91,6 @@ class KernelRuntime {
virtual void ReleaseDeviceRes() {}
void set_device_id(uint32_t device_id) { device_id_ = device_id; }
uint32_t device_id() { return device_id_; }
DeviceAddressPtr AssignSingleOpLaunchMemory(size_t size, const std::string &format, TypeId type);

// set debugger
void SetDebugger() {


+ 1
- 0
tests/ut/cpp/CMakeLists.txt View File

@@ -115,6 +115,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_launch_kernel.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_launch_mul.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_launch_atomic_clean.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_launch_transdata.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/kernel_select_graph_kernel.cc"
"../../../mindspore/ccsrc/runtime/device/convert_tensor_utils.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc"


Loading…
Cancel
Save