Browse Source

add some trt operator

pull/14798/head
wilfChen 4 years ago
parent
commit
ec702d2ecd
1 changed files with 178 additions and 96 deletions
  1. +178
    -96
      mindspore/ccsrc/backend/optimizer/trt_pass/trt_op_converter.cc

+ 178
- 96
mindspore/ccsrc/backend/optimizer/trt_pass/trt_op_converter.cc View File

@@ -23,35 +23,100 @@


namespace mindspore { namespace mindspore {
namespace opt { namespace opt {
// Register operator converter from AnfNode to trt layer: `OPNAME` should keep the same as primitive definition.
#define MS_TRT_CONVERTER_FUNC_REG(OPNAME) \
ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context); \
static const TrtOpRegister(Gpu##OPNAME##ConverterRegister)(#OPNAME, Gpu##OPNAME##TrtConverter); \
ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context)
namespace {
ConvertResult AddReshapeLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context) {
std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}};
}


MS_TRT_CONVERTER_FUNC_REG(Conv2D) {
auto *layer = context->network()->addShuffle(*inputs[0].tensor());
MS_EXCEPTION_IF_NULL(layer);

const auto &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
if (input_shape[0] != output_shape[0]) {
MS_LOG(ERROR) << "Reshape does not support modify batch size. Input batch size: " << input_shape[0]
<< "Output batch size: " << output_shape[0];
return {false, {}};
}

const nvinfer1::Dims &dims = TrtUtils::MsDimsToTrtDims(output_shape, false);
layer->setReshapeDimensions(dims);

return {true, {LayerInput(layer->getOutput(0))}};
}

ConvertResult AddElementLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
nvinfer1::ElementWiseOperation op_type) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
if (!ret || inputs.size() != 2) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 2 expected."; MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 2 expected.";
return {false, {}}; return {false, {}};
} }


const auto &data_format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
if (data_format != "NCHW") {
MS_LOG(ERROR) << "The format: " << data_format << " not supported.";
const std::vector<size_t> &x1_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
const std::vector<size_t> &x2_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 1);
const std::vector<size_t> &y_shape = AnfAlgo::GetOutputInferShape(node, 0);

// Keep to output
auto Broadcast = [&context, &y_shape](nvinfer1::ITensor *tensor, const std::vector<size_t> &x_shape) {
if (x_shape.size() == y_shape.size()) {
return tensor;
}

// Copy x_shape to dim with tail align, and fill left axis with 1.
// For example:
// x: [C, H, W]
// y: [N, C, H, W]
// dim: [1, C, H, W]
nvinfer1::Dims dim;
dim.nbDims = SizeToInt(y_shape.size());
std::fill(dim.d, dim.d + dim.nbDims, 1);
size_t offset = y_shape.size() - x_shape.size();
for (size_t i = 0; i < x_shape.size(); i++) {
dim.d[i + offset] = SizeToInt(x_shape[i]);
}

auto *layer = context->network()->addShuffle(*tensor);
MS_EXCEPTION_IF_NULL(layer);
layer->setReshapeDimensions(dim);

return layer->getOutput(0);
};

auto *x1 = Broadcast(inputs[0].tensor(), x1_shape);
auto *x2 = Broadcast(inputs[1].tensor(), x2_shape);
auto *layer = context->network()->addElementWise(*x1, *x2, op_type);
MS_EXCEPTION_IF_NULL(layer);

return {true, {LayerInput(layer->getOutput(0))}};
}

ConvertResult AddPoolingLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
nvinfer1::PoolingType pooling_type) {
std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}};
}

const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
if (format != "NCHW") {
MS_LOG(ERROR) << "The format: " << format << " not supported.";
return {false, {}}; return {false, {}};
} }


const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size"); const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
const auto &out_channel = AnfAlgo::GetNodeAttr<int64_t>(node, "out_channel");
nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto *layer = context->network()->addConvolutionNd(
*(inputs[0].tensor()), LongToInt(out_channel),
nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
auto *layer = context->network()->addPoolingNd(
*(inputs[0].tensor()), pooling_type, nvinfer1::DimsHW{LongToInt(kernel_size[2]), LongToInt(kernel_size[3])});
MS_EXCEPTION_IF_NULL(layer); MS_EXCEPTION_IF_NULL(layer);


const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "strides");
layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])}); layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});


auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode"); auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
@@ -60,51 +125,54 @@ MS_TRT_CONVERTER_FUNC_REG(Conv2D) {
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} }


if (pad_mode == "PAD") {
const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
}

return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
} }


MS_TRT_CONVERTER_FUNC_REG(Add) {
ConvertResult AddActivationLayer(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context,
nvinfer1::ActivationType act_type) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 2) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 2 expected.";
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}}; return {false, {}};
} }


auto *layer =
context->network()->addElementWise(*inputs[0].tensor(), *inputs[1].tensor(), nvinfer1::ElementWiseOperation::kSUM);
auto *layer = context->network()->addActivation(*inputs[0].tensor(), act_type);
MS_EXCEPTION_IF_NULL(layer); MS_EXCEPTION_IF_NULL(layer);


return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
} }
} // namespace


MS_TRT_CONVERTER_FUNC_REG(MaxPool) {
// Register operator converter from AnfNode to trt layer: `OPNAME` should keep the same as primitive definition.
#define MS_TRT_CONVERTER_FUNC_REG(OPNAME) \
ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context); \
static const TrtOpRegister(Gpu##OPNAME##ConverterRegister)(#OPNAME, Gpu##OPNAME##TrtConverter); \
ConvertResult Gpu##OPNAME##TrtConverter(AnfNodePtr node, std::shared_ptr<TrtConverterContext> context)

MS_TRT_CONVERTER_FUNC_REG(Conv2D) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 2 expected.";
return {false, {}}; return {false, {}};
} }


const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
if (format != "NCHW") {
MS_LOG(ERROR) << "The format: " << format << " not supported.";
const auto &data_format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
if (data_format != "NCHW") {
MS_LOG(ERROR) << "The format: " << data_format << " not supported.";
return {false, {}}; return {false, {}};
} }


const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size"); const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
auto *layer =
context->network()->addPoolingNd(*(inputs[0].tensor()), nvinfer1::PoolingType::kMAX,
nvinfer1::DimsHW{LongToInt(kernel_size[2]), LongToInt(kernel_size[3])});
const auto &out_channel = AnfAlgo::GetNodeAttr<int64_t>(node, "out_channel");
nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto *layer = context->network()->addConvolutionNd(
*(inputs[0].tensor()), LongToInt(out_channel),
nvinfer1::DimsHW{LongToInt(kernel_size[0]), LongToInt(kernel_size[1])}, *(inputs[1].weight()), bias);
MS_EXCEPTION_IF_NULL(layer); MS_EXCEPTION_IF_NULL(layer);


const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "strides");
const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "stride");
layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])}); layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});


auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode"); auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
@@ -113,18 +181,84 @@ MS_TRT_CONVERTER_FUNC_REG(MaxPool) {
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} }


if (pad_mode == "PAD") {
const auto &pad_list = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "pad_list");
layer->setPrePadding(nvinfer1::DimsHW{LongToInt(pad_list[0]), LongToInt(pad_list[2])});
layer->setPostPadding(nvinfer1::DimsHW{LongToInt(pad_list[1]), LongToInt(pad_list[3])});
}

return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
} }


MS_TRT_CONVERTER_FUNC_REG(ReLU) {
// Binary broadcast operators.
MS_TRT_CONVERTER_FUNC_REG(Add) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUM); }
MS_TRT_CONVERTER_FUNC_REG(Sub) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kSUB); }
MS_TRT_CONVERTER_FUNC_REG(Mul) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPROD); }
MS_TRT_CONVERTER_FUNC_REG(Div) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kDIV); }
MS_TRT_CONVERTER_FUNC_REG(Pow) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kPOW); }
MS_TRT_CONVERTER_FUNC_REG(Maximum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMAX); }
MS_TRT_CONVERTER_FUNC_REG(Minimum) { return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kMIN); }
MS_TRT_CONVERTER_FUNC_REG(FloorDiv) {
return AddElementLayer(node, context, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
}

// Pooling operators.
MS_TRT_CONVERTER_FUNC_REG(AvgPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kAVERAGE); }
MS_TRT_CONVERTER_FUNC_REG(MaxPool) { return AddPoolingLayer(node, context, nvinfer1::PoolingType::kMAX); }

// Activation operators.
MS_TRT_CONVERTER_FUNC_REG(ReLU) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kRELU); }
MS_TRT_CONVERTER_FUNC_REG(Sigmoid) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSIGMOID); }
MS_TRT_CONVERTER_FUNC_REG(Tanh) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kTANH); }
MS_TRT_CONVERTER_FUNC_REG(Elu) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kELU); }
MS_TRT_CONVERTER_FUNC_REG(Softsign) { return AddActivationLayer(node, context, nvinfer1::ActivationType::kSOFTSIGN); }

MS_TRT_CONVERTER_FUNC_REG(GeLU) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
if (!ret || inputs.size() != 1) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected."; MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}}; return {false, {}};
} }


auto *layer = context->network()->addActivation(*inputs[0].tensor(), nvinfer1::ActivationType::kRELU);
const std::vector<size_t> &x_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
nvinfer1::Dims dim;
dim.nbDims = SizeToInt(x_shape.size());
std::fill(dim.d, dim.d + dim.nbDims, 1);

auto AddConst = [&context, &dim](const float &coeff) -> nvinfer1::ITensor * {
std::shared_ptr<tensor::Tensor> weight = context->CreateTempWeight(kNumberTypeFloat32, {1});
auto value = static_cast<float *>(weight->data_c());
value[0] = coeff;

auto *layer = context->network()->addConstant(dim, nvinfer1::Weights{nvinfer1::DataType::kFLOAT, value, 1});
MS_EXCEPTION_IF_NULL(layer);
return layer->getOutput(0);
};

// y = 0.5 * x * (1 + tanh(0.7978846 * (x + 0.044715 * x^3)))
auto *c1 = AddConst(0.5f);
auto *c2 = AddConst(1.0f);
auto *c3 = AddConst(0.7978846f);
auto *c4 = AddConst(0.044715f);
auto *c5 = AddConst(3.0f);

auto *x = inputs[0].tensor();
nvinfer1::ILayer *layer = context->network()->addElementWise(*x, *c5, nvinfer1::ElementWiseOperation::kPOW);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*c4, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*c3, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addActivation(*layer->getOutput(0), nvinfer1::ActivationType::kTANH);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*c2, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*x, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
MS_EXCEPTION_IF_NULL(layer);
layer = context->network()->addElementWise(*c1, *layer->getOutput(0), nvinfer1::ElementWiseOperation::kPROD);
MS_EXCEPTION_IF_NULL(layer); MS_EXCEPTION_IF_NULL(layer);


return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
@@ -134,7 +268,7 @@ MS_TRT_CONVERTER_FUNC_REG(MatMul) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) { if (!ret || inputs.size() != 2 || !inputs[0].IsTensor() || !inputs[1].IsWeight()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 2 expected.";
return {false, {}}; return {false, {}};
} }


@@ -201,31 +335,11 @@ MS_TRT_CONVERTER_FUNC_REG(BiasAdd) {
return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
} }


MS_TRT_CONVERTER_FUNC_REG(Reshape) {
std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}};
}
MS_TRT_CONVERTER_FUNC_REG(Reshape) { return AddReshapeLayer(node, context); }


auto *layer = context->network()->addShuffle(*inputs[0].tensor());
MS_EXCEPTION_IF_NULL(layer);
MS_TRT_CONVERTER_FUNC_REG(ExpandDims) { return AddReshapeLayer(node, context); }


const auto &input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0);
const auto &output_shape = AnfAlgo::GetOutputInferShape(node, 0);
if (input_shape[0] != output_shape[0]) {
MS_LOG(ERROR) << "Reshape does not support modify batch size. Input batch size: " << input_shape[0]
<< "Output batch size: " << output_shape[0];
return {false, {}};
}

const nvinfer1::Dims &dims = TrtUtils::MsDimsToTrtDims(output_shape, false);
layer->setReshapeDimensions(dims);
MS_EXCEPTION_IF_NULL(layer);

return {true, {LayerInput(layer->getOutput(0))}};
}
MS_TRT_CONVERTER_FUNC_REG(Squeeze) { return AddReshapeLayer(node, context); }


MS_TRT_CONVERTER_FUNC_REG(BatchNorm) { MS_TRT_CONVERTER_FUNC_REG(BatchNorm) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
@@ -282,38 +396,6 @@ MS_TRT_CONVERTER_FUNC_REG(BatchNorm) {
return {true, {LayerInput(layer->getOutput(0))}}; return {true, {LayerInput(layer->getOutput(0))}};
} }


MS_TRT_CONVERTER_FUNC_REG(AvgPool) {
std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs);
if (!ret || inputs.size() != 1 || !inputs[0].IsTensor()) {
MS_LOG(ERROR) << "Input num not match: " << inputs.size() << ", with 1 expected.";
return {false, {}};
}

const auto &format = AnfAlgo::GetNodeAttr<std::string>(node, "format");
if (format != "NCHW") {
MS_LOG(ERROR) << "The format: " << format << " not supported.";
return {false, {}};
}

const auto &kernel_size = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "kernel_size");
auto *layer =
context->network()->addPoolingNd(*(inputs[0].tensor()), nvinfer1::PoolingType::kAVERAGE,
nvinfer1::DimsHW{LongToInt(kernel_size[2]), LongToInt(kernel_size[3])});
MS_EXCEPTION_IF_NULL(layer);

const auto &strides = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "strides");
layer->setStride(nvinfer1::DimsHW{LongToInt(strides[2]), LongToInt(strides[3])});

auto pad_mode = AnfAlgo::GetNodeAttr<std::string>(node, "pad_mode");
std::transform(pad_mode.begin(), pad_mode.end(), pad_mode.begin(), toupper);
if (pad_mode == "SAME") {
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
}

return {true, {LayerInput(layer->getOutput(0))}};
}

MS_TRT_CONVERTER_FUNC_REG(Concat) { MS_TRT_CONVERTER_FUNC_REG(Concat) {
std::vector<LayerInput> inputs; std::vector<LayerInput> inputs;
bool ret = context->LoadLayerInput(node, &inputs); bool ret = context->LoadLayerInput(node, &inputs);


Loading…
Cancel
Save