| @@ -13,6 +13,7 @@ | |||
| #include "./internal/megdnn_opr_wrapper.inl" | |||
| #include "megbrain/graph/grad_impl.h" | |||
| #include "megbrain/opr/imgproc.h" | |||
| #include "megbrain/opr/io.h" | |||
| #include "megbrain/opr/utility.h" | |||
| using namespace mgb; | |||
| @@ -486,6 +487,7 @@ struct MegDNNOprInitPostCtor<DctChannelSelectForward> { | |||
| } // namespace intl | |||
| } // namespace opr | |||
| } // namespace mgb | |||
| void DctChannelSelectForward::get_output_var_shape( | |||
| const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { | |||
| auto mo = megdnn_opr(); | |||
| @@ -504,6 +506,7 @@ void DctChannelSelectForward::get_output_var_shape( | |||
| } | |||
| out_shape[0] = dst; | |||
| } | |||
| size_t DctChannelSelectForward::get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const { | |||
| @@ -513,6 +516,7 @@ size_t DctChannelSelectForward::get_workspace_size_bytes( | |||
| {input_shapes[0], input(0)->dtype(), input(0)->format()}, {}, {}, | |||
| {output_shapes[0], output(0)->dtype(), output(0)->format()}); | |||
| } | |||
| void DctChannelSelectForward::scn_do_execute() { | |||
| auto&& inp = input(); | |||
| auto mo = megdnn_opr(); | |||
| @@ -524,7 +528,6 @@ void DctChannelSelectForward::scn_do_execute() { | |||
| } else { | |||
| mgb_assert(inp.size() == 3, "no support input tensor num %zu", | |||
| inp.size()); | |||
| mo->exec(inp[0]->dev_tensor().as_megdnn(), | |||
| inp[1]->dev_tensor().as_megdnn(), | |||
| inp[2]->dev_tensor().as_megdnn(), | |||
| @@ -533,7 +536,70 @@ void DctChannelSelectForward::scn_do_execute() { | |||
| } | |||
| } | |||
| MEGDNN_OPR_INIT3(DctChannelSelectForward, "dct_channel_select") | |||
| void DctChannelSelectForward::valid_mask(const int* mask_offset, int mask_len, | |||
| const int* mask_val, int mask_val_len, | |||
| const Param& param) { | |||
| if (mask_len <= 0) | |||
| return; | |||
| mgb_assert(mask_offset[0] == 0, | |||
| "The first element of mask_offset must be zero, but got %d. For " | |||
| "example mask offset [0, 15, 20] indicate there are 2 ic, and " | |||
| "ic_0 will have (15 - 0) oc, ic_1 have (20 - 15) oc", | |||
| mask_offset[0]); | |||
| for (int i = 1; i < mask_len; ++i) { | |||
| if (param.format == Param::Format::NCHW4) { | |||
| mgb_assert(mask_offset[i] % 4 == 0, | |||
| "Invalid mask offset %d at %d, it should be times of " | |||
| "4 when using nchw4 format", | |||
| mask_offset[i], i); | |||
| } | |||
| mgb_assert(mask_offset[i] >= mask_offset[i - 1], | |||
| "The offset of mask must be increasing, but %d(%d) is less " | |||
| "than %d(%d)", | |||
| mask_offset[i], i, mask_offset[i - 1], i - 1); | |||
| } | |||
| const int max_mask = param.dct_block_size * param.dct_block_size; | |||
| for (int i = 0; i < mask_val_len; ++i) { | |||
| mgb_assert(0 <= mask_val[i] && mask_val[i] < max_mask, | |||
| "Invalid mask_val, assert 0 <= mask_val[%d] < %d, aka 0 <= " | |||
| "%d < %d", | |||
| i, max_mask, mask_val[i], max_mask); | |||
| } | |||
| } | |||
| DctChannelSelectForward::DctChannelSelectForward( | |||
| VarNode* src, VarNode* mask_offset, VarNode* mask_val, | |||
| const Param& param, const OperatorNodeConfig& config) | |||
| : Super(OperatorNodeBaseCtorParam{ | |||
| src->owner_graph(), config, "dct_channel_select", {src}}) { | |||
| init_megdnn_opr(*this, param); | |||
| add_input({src, mask_offset, mask_val}); | |||
| if (mask_offset != nullptr) { | |||
| mgb_assert(mask_val, | |||
| "mask_val should not be null when mask_offset is not null"); | |||
| auto host_offset = mask_offset->owner_opr() | |||
| ->cast_final_safe<opr::ImmutableTensor>() | |||
| .host_value(); | |||
| auto host_val = mask_val->owner_opr() | |||
| ->cast_final_safe<opr::ImmutableTensor>() | |||
| .host_value(); | |||
| valid_mask(host_offset.ptr<int>(), | |||
| host_offset.layout().total_nr_elems(), host_val.ptr<int>(), | |||
| host_val.layout().total_nr_elems(), param); | |||
| } | |||
| intl::MegDNNOprInitPostCtor<DctChannelSelectForward>::apply(*this); | |||
| } | |||
| SymbolVar DctChannelSelectForward::make(SymbolVar src, SymbolVar mask_offset, | |||
| SymbolVar mask_val, const Param& param, | |||
| const OperatorNodeConfig& config) { | |||
| intl::MegDNNOprInitInputsModifier<DctChannelSelectForward>::apply( | |||
| param, {&src, &mask_offset, &mask_val}); | |||
| return src.insert_single_output_opr<DctChannelSelectForward>( | |||
| src.node(), mask_offset.node(), mask_val.node(), param, config); | |||
| } | |||
| MEGDNN_OPR_INIT1(DctChannelSelectForward, "dct_channel_select") | |||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | |||
| @@ -639,6 +639,9 @@ SymbolVar ImmutableTensor::make(ComputingGraph &graph, const DTypeScalar &val, | |||
| const DeviceTensorND& ImmutableTensor::value() const { | |||
| return m_value.dev(); | |||
| } | |||
| const DeviceTensorND& ImmutableTensor::host_value() { | |||
| return const_cast<Value*>(&m_value)->static_infer(); | |||
| } | |||
| SymbolVar ImmutableTensor::make_from_value( | |||
| ComputingGraph &graph, | |||
| @@ -286,6 +286,9 @@ size_t get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const override; | |||
| void scn_do_execute() override; | |||
| void valid_mask(const int* mask_offset, int mask_len, const int* mask_val, | |||
| int mask_val_len, const Param& param); | |||
| }; | |||
| using DctChannelSelect = DctChannelSelectForward; | |||
| @@ -378,6 +378,8 @@ MGB_DEFINE_OPR_CLASS(ImmutableTensor, intl::DeviceTensorHolder) // { | |||
| //! get underlying value on device | |||
| const DeviceTensorND& value() const; | |||
| const DeviceTensorND& host_value(); | |||
| SymbolVar shallow_copy( | |||
| ComputingGraph &graph, const OperatorNodeConfig &config) const { | |||
| return make_from_value(graph, m_value, m_value_refkeep, config); | |||
| @@ -803,4 +803,54 @@ TEST(TestOprImgproc, DCT) { | |||
| MGB_MARK_USED_VAR(fwd3); | |||
| MGB_MARK_USED_VAR(gen_mask); | |||
| } | |||
| TEST(TestOprImgproc, DCT_BAD_MASK) { | |||
| HostTensorGenerator<dtype::Uint8> gen_u8; | |||
| HostTensorGenerator<dtype::Int32> gen_s32; | |||
| TensorShape src_shape({1, 2, 256, 256}), mask_offset_shape({3}), | |||
| mask_val_shape({8}); | |||
| opr::DctChannelSelectForward::Param param; | |||
| auto graph = ComputingGraph::make(); | |||
| auto src_tensor = gen_u8(src_shape); | |||
| auto mask_offset_tensor = gen_s32(mask_offset_shape); | |||
| auto mask_val_tensor = gen_s32(mask_val_shape); | |||
| auto mask_offset_ptr = mask_offset_tensor->ptr<int32_t>(); | |||
| auto mask_val_ptr = mask_val_tensor->ptr<int32_t>(); | |||
| mask_offset_ptr[0] = 1; | |||
| mask_val_ptr[0] = 64; | |||
| auto src_sym = opr::ImmutableTensor::make(*graph, *src_tensor); | |||
| auto mask_offset_sym = | |||
| opr::ImmutableTensor::make(*graph, *mask_offset_tensor); | |||
| auto mask_val_sym = opr::ImmutableTensor::make(*graph, *mask_val_tensor); | |||
| ASSERT_THROW(opr::DctChannelSelect::make(src_sym, mask_offset_sym, | |||
| mask_val_sym, param), | |||
| MegBrainError); | |||
| mask_offset_ptr[0] = 0; | |||
| mask_offset_ptr[1] = 2; | |||
| mask_offset_ptr[2] = 8; | |||
| mask_offset_sym = opr::ImmutableTensor::make(*graph, *mask_offset_tensor); | |||
| ASSERT_THROW(opr::DctChannelSelect::make(src_sym, mask_offset_sym, | |||
| mask_val_sym, param), | |||
| MegBrainError); | |||
| mask_val_ptr[0] = 0; | |||
| mask_val_ptr[1] = 1; | |||
| mask_val_ptr[2] = 2; | |||
| mask_val_ptr[3] = 3; | |||
| mask_val_ptr[4] = 4; | |||
| mask_val_ptr[5] = 5; | |||
| mask_val_ptr[6] = 6; | |||
| mask_val_ptr[7] = 7; | |||
| mask_val_sym = opr::ImmutableTensor::make(*graph, *mask_val_tensor); | |||
| opr::DctChannelSelect::make(src_sym, mask_offset_sym, mask_val_sym, param); | |||
| param.format = opr::DctChannelSelect::Param::Format::NCHW4; | |||
| ASSERT_THROW(opr::DctChannelSelect::make(src_sym, mask_offset_sym, | |||
| mask_val_sym, param), | |||
| MegBrainError); | |||
| } | |||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | |||
| @@ -150,6 +150,36 @@ TEST(TestOprIO, ImmutableTensor) { | |||
| } | |||
| TEST(TestOprIO, ImmutableTensorHostvalue) { | |||
| HostTensorGenerator<> gen; | |||
| TensorShape shape({2, 3}); | |||
| auto host_x = gen(shape); | |||
| auto graph = ComputingGraph::make(); | |||
| auto x = opr::ImmutableTensor::make(*graph, *host_x); | |||
| auto y = x.node()->owner_opr() | |||
| ->cast_final_safe<opr::ImmutableTensor>() | |||
| .host_value(); | |||
| for (size_t i = 0; i < shape.total_nr_elems(); ++i) { | |||
| ASSERT_EQ(host_x->ptr<float>()[i], y.ptr<float>()[i]); | |||
| } | |||
| } | |||
| TEST(TestOprIO, ImmutableTensorHostvalueGPU) { | |||
| REQUIRE_GPU(1); | |||
| auto gpu_cn = CompNode::load("gpu0"); | |||
| HostTensorGenerator<> gen; | |||
| TensorShape shape({2, 3}); | |||
| auto host_x = gen(shape); | |||
| auto graph = ComputingGraph::make(); | |||
| auto x = opr::ImmutableTensor::make(*graph, *host_x, {gpu_cn}); | |||
| auto y = x.node()->owner_opr() | |||
| ->cast_final_safe<opr::ImmutableTensor>() | |||
| .host_value(); | |||
| for (size_t i = 0; i < shape.total_nr_elems(); ++i) { | |||
| ASSERT_EQ(host_x->ptr<float>()[i], y.ptr<float>()[i]); | |||
| } | |||
| } | |||
| TEST(TestOprIO, ImmutableTensorLarge) { | |||
| HostTensorGenerator<> gen; | |||
| auto host_x = gen({1025}); | |||