GitOrigin-RevId: 4a14f53738
tags/v1.9.0
| @@ -91,7 +91,7 @@ class ResNet(M.Module): | |||||
| def run_dtr_resnet1202(): | def run_dtr_resnet1202(): | ||||
| batch_size = 7 | |||||
| batch_size = 6 | |||||
| resnet1202 = ResNet(BasicBlock, [200, 200, 200]) | resnet1202 = ResNet(BasicBlock, [200, 200, 200]) | ||||
| opt = optim.SGD(resnet1202.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4) | opt = optim.SGD(resnet1202.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4) | ||||
| gm = GradManager().attach(resnet1202.parameters()) | gm = GradManager().attach(resnet1202.parameters()) | ||||
| @@ -12,6 +12,7 @@ | |||||
| #include "megbrain/comp_node.h" | #include "megbrain/comp_node.h" | ||||
| #include "megbrain/comp_node_env.h" | #include "megbrain/comp_node_env.h" | ||||
| #include "megbrain/imperative/physical_tensor.h" | #include "megbrain/imperative/physical_tensor.h" | ||||
| #include "megbrain/rdnn/management.h" | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| @@ -28,13 +29,12 @@ struct DnnOprCaller { | |||||
| CompNode cn; | CompNode cn; | ||||
| DeviceTensorND dev_tensor; | DeviceTensorND dev_tensor; | ||||
| Workspace workspace; | Workspace workspace; | ||||
| std::unique_ptr<Opr> op; | |||||
| mgb::opr::intl::UniqPtrWithCN<Opr> op; | |||||
| DnnOprCaller(CompNode cn) : cn(cn), op(create_operator(cn)) {} | |||||
| DnnOprCaller(CompNode cn) : cn(cn), op(std::move(create_operator(cn))) {} | |||||
| static std::unique_ptr<Opr> create_operator(CompNode cn) { | |||||
| auto&& handle = MegDNNHandle::get(CompNodeEnv::from_comp_node(cn)).handle(); | |||||
| return handle->create_operator<Opr>(); | |||||
| static mgb::opr::intl::UniqPtrWithCN<Opr> create_operator(CompNode cn) { | |||||
| return mgb::opr::intl::create_megdnn_opr<Opr>(cn); | |||||
| } | } | ||||
| megdnn::Workspace create_workspace(TensorLayout layout) { | megdnn::Workspace create_workspace(TensorLayout layout) { | ||||
| @@ -171,7 +171,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| bool empty_input = src_layout.is_empty(); | bool empty_input = src_layout.is_empty(); | ||||
| size_t nr_inp = inputs.size(); | size_t nr_inp = inputs.size(); | ||||
| DeviceTensorND ws, reserve; | |||||
| DeviceTensorND reserve; | |||||
| size_t sz = 0, rsz = 0; | size_t sz = 0, rsz = 0; | ||||
| TensorLayout w_layout({sz}, dtype::Byte()); | TensorLayout w_layout({sz}, dtype::Byte()); | ||||
| @@ -186,9 +186,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| w_layout = TensorLayout({sz}, dtype::Byte()); | w_layout = TensorLayout({sz}, dtype::Byte()); | ||||
| r_layout = TensorLayout({rsz}, dtype::Byte()); | r_layout = TensorLayout({rsz}, dtype::Byte()); | ||||
| } | } | ||||
| auto wk = Blob::make(comp_node, sz); | |||||
| auto ptr = wk->storage().get(); | |||||
| megdnn::Workspace dnn_wk(ptr, sz); | |||||
| auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
| reserve = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, r_layout); | reserve = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, r_layout); | ||||
| // alloc memory | // alloc memory | ||||
| @@ -123,8 +123,6 @@ TensorLayout do_shape_infer( | |||||
| std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | ||||
| auto&& conv = static_cast<const Convolution&>(def); | |||||
| using Param = ::megdnn::param::Convolution; | using Param = ::megdnn::param::Convolution; | ||||
| SmallVector<LogicalTensorDesc> dests(1); | SmallVector<LogicalTensorDesc> dests(1); | ||||
| @@ -167,34 +165,33 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| inp_shapes[i] = inputs[i]->layout(); | inp_shapes[i] = inputs[i]->layout(); | ||||
| } | } | ||||
| oup_shapes[0] = out_layout; | oup_shapes[0] = out_layout; | ||||
| auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::ConvBiasForward>(cn); | |||||
| dnn_opr->param().pad_h = conv.pad_h; | |||||
| dnn_opr->param().pad_w = conv.pad_w; | |||||
| dnn_opr->param().stride_h = conv.stride_h; | |||||
| dnn_opr->param().stride_w = conv.stride_w; | |||||
| dnn_opr->param().dilate_h = conv.dilate_h; | |||||
| dnn_opr->param().dilate_w = conv.dilate_w; | |||||
| dnn_opr->param().sparse = conv.sparse; | |||||
| dnn_opr->param().compute_mode = conv.compute_mode; | |||||
| dnn_opr->param().format = conv.format; | |||||
| DnnOprCaller<megdnn::ConvBiasForward> dnn_opr(cn); | |||||
| dnn_opr.op->param().pad_h = conv.pad_h; | |||||
| dnn_opr.op->param().pad_w = conv.pad_w; | |||||
| dnn_opr.op->param().stride_h = conv.stride_h; | |||||
| dnn_opr.op->param().stride_w = conv.stride_w; | |||||
| dnn_opr.op->param().dilate_h = conv.dilate_h; | |||||
| dnn_opr.op->param().dilate_w = conv.dilate_w; | |||||
| dnn_opr.op->param().sparse = conv.sparse; | |||||
| dnn_opr.op->param().compute_mode = conv.compute_mode; | |||||
| dnn_opr.op->param().format = conv.format; | |||||
| // shape infer | // shape infer | ||||
| TensorLayout shp({0}, inputs[0]->dtype()); | TensorLayout shp({0}, inputs[0]->dtype()); | ||||
| shp.ndim = 0; | shp.ndim = 0; | ||||
| size_t sz = setup_algo<megdnn::ConvBiasForward>( | size_t sz = setup_algo<megdnn::ConvBiasForward>( | ||||
| {inp_shapes[0], inp_shapes[1], shp, shp, oup_shapes[0]}, dnn_opr.get(), 0, | |||||
| false, false, cn, conv.policy(), false); | |||||
| {inp_shapes[0], inp_shapes[1], shp, shp, oup_shapes[0]}, dnn_opr.op.get(), | |||||
| 0, false, false, cn, conv.policy(), false); | |||||
| // alloc memory | // alloc memory | ||||
| DeviceTensorND bias = BlobManager::inst()->alloc_workspace_with_defrag(cn, shp); | DeviceTensorND bias = BlobManager::inst()->alloc_workspace_with_defrag(cn, shp); | ||||
| auto wk = Blob::make(cn, sz); | |||||
| auto ptr = wk->storage().get(); | |||||
| megdnn::Workspace dnn_wk(ptr, sz); | |||||
| TensorLayout w_layout({sz}, dtype::Byte()); | |||||
| auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
| // exeucte | // exeucte | ||||
| dnn_opr->exec( | |||||
| dnn_opr.op->exec( | |||||
| inp_tensornds[0], inp_tensornds[1], bias.as_megdnn(), bias.as_megdnn(), | inp_tensornds[0], inp_tensornds[1], bias.as_megdnn(), bias.as_megdnn(), | ||||
| out.as_megdnn(), nullptr, dnn_wk); | out.as_megdnn(), nullptr, dnn_wk); | ||||
| return {Tensor::make(out)}; | return {Tensor::make(out)}; | ||||
| @@ -359,7 +356,6 @@ TensorLayout do_shape_infer( | |||||
| std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( | ||||
| const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { | ||||
| auto&& conv = static_cast<const Convolution3D&>(def); | |||||
| using Param = ::megdnn::param::Convolution3D; | using Param = ::megdnn::param::Convolution3D; | ||||
| SmallVector<LogicalTensorDesc> dests(1); | SmallVector<LogicalTensorDesc> dests(1); | ||||
| @@ -398,24 +394,23 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| inp_shapes[i] = inputs[i]->layout(); | inp_shapes[i] = inputs[i]->layout(); | ||||
| } | } | ||||
| oup_shapes[0] = out_layout; | oup_shapes[0] = out_layout; | ||||
| auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::Convolution3D>(cn); | |||||
| dnn_opr->param() = conv.param(); | |||||
| DnnOprCaller<megdnn::Convolution3D> dnn_opr(cn); | |||||
| dnn_opr.op->param() = conv.param(); | |||||
| // shape infer | // shape infer | ||||
| size_t sz = setup_algo<megdnn::Convolution3D>( | size_t sz = setup_algo<megdnn::Convolution3D>( | ||||
| {inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.get(), 0, false, | |||||
| {inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false, | |||||
| false, cn, conv.policy(), false); | false, cn, conv.policy(), false); | ||||
| // alloc memory | // alloc memory | ||||
| DeviceTensorND out = | DeviceTensorND out = | ||||
| BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); | BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); | ||||
| auto wk = Blob::make(cn, sz); | |||||
| auto ptr = wk->storage().get(); | |||||
| megdnn::Workspace dnn_wk(ptr, sz); | |||||
| TensorLayout w_layout({sz}, dtype::Byte()); | |||||
| auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
| // exeucte | // exeucte | ||||
| dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
| dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); | |||||
| return {Tensor::make(out)}; | return {Tensor::make(out)}; | ||||
| } | } | ||||
| @@ -29,7 +29,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| using TensorND = megdnn::TensorND; | using TensorND = megdnn::TensorND; | ||||
| SmallVector<TensorND> inp_tensornds; | SmallVector<TensorND> inp_tensornds; | ||||
| inp_tensornds.reserve(inputs.size()); | inp_tensornds.reserve(inputs.size()); | ||||
| auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::Dot>(comp_node); | |||||
| DnnOprCaller<megdnn::Dot> dnn_opr(comp_node); | |||||
| for (unsigned i = 0; i < inputs.size(); ++i) { | for (unsigned i = 0; i < inputs.size(); ++i) { | ||||
| auto dnn_ten = inputs[i]->dnn_tensor(); | auto dnn_ten = inputs[i]->dnn_tensor(); | ||||
| inp_tensornds.push_back(dnn_ten); | inp_tensornds.push_back(dnn_ten); | ||||
| @@ -37,28 +37,27 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| TensorLayout oup_layout{inputs[0]->dtype()}; | TensorLayout oup_layout{inputs[0]->dtype()}; | ||||
| auto inp1_tensor = inputs[0]->dnn_tensor(); | auto inp1_tensor = inputs[0]->dnn_tensor(); | ||||
| auto inp2_tensor = inputs[1]->dnn_tensor(); | auto inp2_tensor = inputs[1]->dnn_tensor(); | ||||
| dnn_opr->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); | |||||
| dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); | |||||
| if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { | if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { | ||||
| auto fill_opr = opr::intl::create_megdnn_opr<megdnn::Fill>(comp_node); | |||||
| DnnOprCaller<megdnn::Fill> fill_opr(comp_node); | |||||
| DeviceTensorND out = | DeviceTensorND out = | ||||
| BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | ||||
| fill_opr->param() = 0; | |||||
| fill_opr->exec(out.as_megdnn(), {}); | |||||
| fill_opr.op->param() = 0; | |||||
| fill_opr.op->exec(out.as_megdnn(), {}); | |||||
| return {Tensor::make(out)}; | return {Tensor::make(out)}; | ||||
| } | } | ||||
| auto wk_size = dnn_opr->get_workspace_in_bytes( | |||||
| auto sz = dnn_opr.op->get_workspace_in_bytes( | |||||
| inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); | inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); | ||||
| DeviceTensorND out_devtensor = | DeviceTensorND out_devtensor = | ||||
| BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); | ||||
| TensorLayout wk_layout{TensorShape{wk_size}, inputs[0]->dtype()}; | |||||
| DeviceTensorND workspace = | |||||
| BlobManager::inst()->alloc_workspace_with_defrag(comp_node, wk_layout); | |||||
| megdnn::Workspace dnn_wk(workspace.raw_ptr(), wk_size); | |||||
| dnn_opr->exec( | |||||
| TensorLayout w_layout({sz}, dtype::Byte()); | |||||
| auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
| dnn_opr.op->exec( | |||||
| inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); | inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); | ||||
| return {Tensor::make(out_devtensor)}; | return {Tensor::make(out_devtensor)}; | ||||
| @@ -106,9 +106,8 @@ void apply_on_device_tensornd( | |||||
| mgb_assert( | mgb_assert( | ||||
| inputs.size() == trait.arity, "%s expects %u inputs; got %zu actually", | inputs.size() == trait.arity, "%s expects %u inputs; got %zu actually", | ||||
| trait.name, trait.arity, inputs.size()); | trait.name, trait.arity, inputs.size()); | ||||
| auto&& dnn_opr = | |||||
| opr::intl::create_megdnn_opr<megdnn::Elemwise>(inputs[0].comp_node()); | |||||
| opr::Elemwise::perform(op_def.mode, (*outputs)[0], inputs, dnn_opr); | |||||
| DnnOprCaller<megdnn::Elemwise> dnn_opr(inputs[0].comp_node()); | |||||
| opr::Elemwise::perform(op_def.mode, (*outputs)[0], inputs, dnn_opr.op); | |||||
| } | } | ||||
| SmallVector<TensorPtr> apply_on_physical_tensor( | SmallVector<TensorPtr> apply_on_physical_tensor( | ||||
| @@ -139,16 +138,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| if (is_empty) { | if (is_empty) { | ||||
| return {Tensor::make(out)}; | return {Tensor::make(out)}; | ||||
| } | } | ||||
| auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::Elemwise>(comp_node); | |||||
| DnnOprCaller<megdnn::Elemwise> dnn_opr(comp_node); | |||||
| dnn_opr->param() = op_def.param(); | |||||
| if (dnn_opr->param().mode == Mode::FUSE_MUL_ADD3 || | |||||
| dnn_opr->param().mode == Mode::FUSE_MUL_ADD4 || | |||||
| dnn_opr.op->param() = op_def.param(); | |||||
| if (dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD3 || | |||||
| dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 || | |||||
| (inp_tensornds.size() && | (inp_tensornds.size() && | ||||
| inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { | inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { | ||||
| opr::Elemwise::perform_dnn(comp_node, out, inp_tensornds, dnn_opr); | |||||
| opr::Elemwise::perform_dnn(comp_node, out, inp_tensornds, dnn_opr.op); | |||||
| } else { | } else { | ||||
| dnn_opr->exec(inp_tensornds, out.as_megdnn()); | |||||
| dnn_opr.op->exec(inp_tensornds, out.as_megdnn()); | |||||
| } | } | ||||
| return {Tensor::make(out)}; | return {Tensor::make(out)}; | ||||
| @@ -8,6 +8,7 @@ | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||||
| * ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #include "../dnn_op_helper.h" | |||||
| #include "../op_trait.h" | #include "../op_trait.h" | ||||
| #include "megbrain/imperative/ops/autogen.h" | #include "megbrain/imperative/ops/autogen.h" | ||||
| @@ -34,8 +35,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| auto dest = outputs[size]; | auto dest = outputs[size]; | ||||
| auto cn = dest->comp_node(); | auto cn = dest->comp_node(); | ||||
| auto&& dnn_opr = opr::intl::create_megdnn_opr<megdnn::CheckNonFinite>(cn); | |||||
| size_t wk_size = 0; | |||||
| DnnOprCaller<megdnn::CheckNonFinite> dnn_opr(cn); | |||||
| SmallVector<megdnn::TensorND> srcs(size); | SmallVector<megdnn::TensorND> srcs(size); | ||||
| // copy an outputs to the dnn for inplace | // copy an outputs to the dnn for inplace | ||||
| for (size_t i = 0; i < size; ++i) { | for (size_t i = 0; i < size; ++i) { | ||||
| @@ -44,11 +44,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| srcs[i] = outputs[i]->dev_tensor().as_megdnn(); | srcs[i] = outputs[i]->dev_tensor().as_megdnn(); | ||||
| } | } | ||||
| megdnn::CheckNonFinite::Param param({op.scale}); | megdnn::CheckNonFinite::Param param({op.scale}); | ||||
| dnn_opr->param() = param; | |||||
| wk_size = dnn_opr->get_workspace_in_bytes(srcs, dest->layout()); | |||||
| auto wk = Blob::make(cn, wk_size); | |||||
| megdnn::Workspace dnn_wk(wk->storage().get(), wk_size); | |||||
| dnn_opr->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk); | |||||
| dnn_opr.op->param() = param; | |||||
| size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout()); | |||||
| TensorLayout w_layout({sz}, dtype::Byte()); | |||||
| auto dnn_wk = dnn_opr.create_workspace(w_layout); | |||||
| dnn_opr.op->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk); | |||||
| return outputs; | return outputs; | ||||
| } | } | ||||