GitOrigin-RevId: 4038fe23a4
tags/v1.0.0-rc1
| @@ -13,6 +13,7 @@ | |||||
| #include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
| #include "megdnn/oprs/nn.h" | #include "megdnn/oprs/nn.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/common/opr_delegate.h" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| @@ -445,13 +446,13 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, | |||||
| //! Only used for naive implementation. DO NOT use the following function in | //! Only used for naive implementation. DO NOT use the following function in | ||||
| //! other backends. | //! other backends. | ||||
| void handle_z_inp_and_activation(Handle* handle, | |||||
| param::ConvBias::NonlineMode nonline_mode, | |||||
| const TensorND& conv_bias_tensor, | |||||
| const TensorND& z_tensor, | |||||
| const TensorND& dst_tensor, | |||||
| dt_byte* workspace_ptr) { | |||||
| void handle_z_inp_and_activation_naive( | |||||
| param::ConvBias::NonlineMode nonline_mode, | |||||
| const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
| const TensorND& dst_tensor, dt_byte* workspace_ptr) { | |||||
| auto res = dst_tensor, z_float = z_tensor; | auto res = dst_tensor, z_float = z_tensor; | ||||
| //!create naive inplace handle | |||||
| auto handle = inplace_cpu_handle(2); | |||||
| if (z_tensor.layout.ndim > 0 && | if (z_tensor.layout.ndim > 0 && | ||||
| z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | z_tensor.layout.dtype.category() != DTypeCategory::FLOAT) { | ||||
| dt_byte *res_float_workspace_ptr = nullptr, | dt_byte *res_float_workspace_ptr = nullptr, | ||||
| @@ -18,16 +18,14 @@ | |||||
| namespace megdnn { | namespace megdnn { | ||||
| void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, | void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args, | ||||
| const TensorND* conv_dst_tensor, | |||||
| const TensorND* dst_tensor, | |||||
| const TensorND* bias_tensor); | |||||
| const TensorND* conv_dst_tensor, | |||||
| const TensorND* dst_tensor, | |||||
| const TensorND* bias_tensor); | |||||
| void handle_z_inp_and_activation(Handle* handle, | |||||
| param::ConvBias::NonlineMode nonline_mode, | |||||
| const TensorND& conv_bias_tensor, | |||||
| const TensorND& z_tensor, | |||||
| const TensorND& dst_tensor, | |||||
| dt_byte* workspace_ptr); | |||||
| void handle_z_inp_and_activation_naive( | |||||
| param::ConvBias::NonlineMode nonline_mode, | |||||
| const TensorND& conv_bias_tensor, const TensorND& z_tensor, | |||||
| const TensorND& dst_tensor, dt_byte* workspace_ptr); | |||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -13,8 +13,8 @@ | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle() { | |||||
| auto make = []() { | |||||
| const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle(int debug_level) { | |||||
| auto make = [](int deb_level) { | |||||
| megcoreDeviceHandle_t dev_handle; | megcoreDeviceHandle_t dev_handle; | ||||
| megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | megcoreCreateDeviceHandle(&dev_handle, megcorePlatformCPU); | ||||
| megcoreComputingHandle_t comp_handle; | megcoreComputingHandle_t comp_handle; | ||||
| @@ -23,12 +23,20 @@ const std::shared_ptr<Handle>& megdnn::inplace_cpu_handle() { | |||||
| megcoreDestroyComputingHandle(comp_handle); | megcoreDestroyComputingHandle(comp_handle); | ||||
| megcoreDestroyDeviceHandle(dev_handle); | megcoreDestroyDeviceHandle(dev_handle); | ||||
| }; | }; | ||||
| std::shared_ptr<Handle> handle = Handle::make(comp_handle); | |||||
| std::shared_ptr<Handle> handle = Handle::make(comp_handle, deb_level); | |||||
| handle->set_destructor(destructor); | handle->set_destructor(destructor); | ||||
| return handle; | return handle; | ||||
| }; | }; | ||||
| static std::shared_ptr<Handle> handle = make(); | |||||
| return handle; | |||||
| if (debug_level == 0) { | |||||
| static std::shared_ptr<Handle> handle = make(0); | |||||
| return handle; | |||||
| } else if (debug_level == 1) { | |||||
| static std::shared_ptr<Handle> handle_fallback = make(1); | |||||
| return handle_fallback; | |||||
| } else { | |||||
| static std::shared_ptr<Handle> handle_naive = make(2); | |||||
| return handle_naive; | |||||
| } | |||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -24,7 +24,7 @@ namespace megdnn { | |||||
| * Usually used for calling other opr impls from some opr impl. You probably | * Usually used for calling other opr impls from some opr impl. You probably | ||||
| * want to use CpuOprDelegationStorage instead. | * want to use CpuOprDelegationStorage instead. | ||||
| */ | */ | ||||
| const std::shared_ptr<Handle>& inplace_cpu_handle(); | |||||
| const std::shared_ptr<Handle>& inplace_cpu_handle(int debug_level = 0); | |||||
| /*! | /*! | ||||
| * \brief storage for oprs on inplace CPU handle | * \brief storage for oprs on inplace CPU handle | ||||
| @@ -104,8 +104,9 @@ void BatchConvBiasForwardImpl::exec(_megdnn_tensor_in src, | |||||
| } | } | ||||
| #undef DISPATCH | #undef DISPATCH | ||||
| #undef DISPATCH_RAW | #undef DISPATCH_RAW | ||||
| handle_z_inp_and_activation(handle(), param().nonlineMode, sfb, z, dst, | |||||
| reinterpret_cast<dt_byte*>(ws.get(1))); | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(handle_z_inp_and_activation_naive( | |||||
| param().nonlineMode, sfb, z, dst, | |||||
| reinterpret_cast<dt_byte*>(ws.get(1)))); | |||||
| } | } | ||||
| std::vector<BatchConvBiasForward::Algorithm*> | std::vector<BatchConvBiasForward::Algorithm*> | ||||
| @@ -137,8 +137,8 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||||
| } | } | ||||
| #undef DISPATCH | #undef DISPATCH | ||||
| #undef DISPATCH_RAW | #undef DISPATCH_RAW | ||||
| handle_z_inp_and_activation(handle(), param().nonlineMode, sfb, z, dst, | |||||
| workspace_ptr); | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(handle_z_inp_and_activation_naive( | |||||
| param().nonlineMode, sfb, z, dst, workspace_ptr)); | |||||
| } | } | ||||
| MIDOUT_END(); | MIDOUT_END(); | ||||
| } | } | ||||
| @@ -11,6 +11,7 @@ | |||||
| #include "megbrain/opr/io.h" | #include "megbrain/opr/io.h" | ||||
| #include "megbrain/opr/basic_arith_wrapper.h" | #include "megbrain/opr/basic_arith_wrapper.h" | ||||
| #include "megbrain/opr/dnn/convolution.h" | |||||
| #include "megbrain/opr/utility.h" | #include "megbrain/opr/utility.h" | ||||
| #include "megbrain/opr/blas.h" | #include "megbrain/opr/blas.h" | ||||
| #include "megbrain/opr/tensor_manip.h" | #include "megbrain/opr/tensor_manip.h" | ||||
| @@ -22,6 +23,7 @@ | |||||
| #include "megbrain/graph/execution_mask.h" | #include "megbrain/graph/execution_mask.h" | ||||
| #include "megbrain/utils/timer.h" | #include "megbrain/utils/timer.h" | ||||
| #include "megbrain/comp_node_env.h" | #include "megbrain/comp_node_env.h" | ||||
| #include "megbrain/gopt/inference.h" | |||||
| #include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
| @@ -1814,4 +1816,29 @@ TEST(TestGraph, OperatorNodeConfigInstanceID) { | |||||
| } | } | ||||
| } | } | ||||
| TEST(TestGraph, NaiveRecord2NCHW44) { | |||||
| auto cn = CompNode::load("cpu0"); | |||||
| using ConvParam = megdnn::ConvBias::Param; | |||||
| ConvParam param; | |||||
| param.sparse = ConvParam::Sparse::DENSE; | |||||
| param.format = ConvParam::Format::NCHW44; | |||||
| HostTensorGenerator<> gen; | |||||
| auto host_x = gen({1, 2, 12, 12, 4}, cn), | |||||
| host_w = gen({2, 2, 3, 3, 4, 4}, cn), | |||||
| host_b = gen({1, 2, 1, 1, 4}, cn); | |||||
| HostTensorND host_z; | |||||
| auto graph = ComputingGraph::make(); | |||||
| auto x = opr::Host2DeviceCopy::make(*graph, host_x), | |||||
| w = opr::Host2DeviceCopy::make(*graph, host_w), | |||||
| b = opr::Host2DeviceCopy::make(*graph, host_b), | |||||
| z = opr::ConvBiasForward::make(x, w, b, param, {}); | |||||
| graph->options().comp_node_seq_record_level = 2; | |||||
| graph->options().var_sanity_check_first_run = false; | |||||
| auto func = graph->compile({make_callback_copy(z, host_z)}); | |||||
| ComputingGraph::assert_destroy(graph); | |||||
| host_x->copy_from_fixlayout(*gen(host_x->shape(), cn)); | |||||
| func->execute().wait(); | |||||
| } | |||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||