fwd only
GitOrigin-RevId: 989474168d
tags/v1.11.0
| @@ -1475,6 +1475,35 @@ protected: | |||||
| using LAMB = LAMBUpdate; | using LAMB = LAMBUpdate; | ||||
| class NormBase : public OperatorBase { | |||||
| DEF_OPR_PARAM(Norm); // package norm params in Norm keyword from py declaration | |||||
| DEF_OPR_IMPL(NormBase, OperatorBase, 1, 1); // constructor and static members | |||||
| public: | |||||
| virtual void deduce_layout(const TensorLayout& src, TensorLayout& dst) = 0; | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) = 0; | |||||
| protected: | |||||
| void check_exec( | |||||
| const TensorLayout& src, const TensorLayout& dst, | |||||
| size_t workspace_in_bytes); | |||||
| }; | |||||
| class NormForward : public NormBase { | |||||
| DEF_OPR_IMPL(NormForward, NormBase, 1, 1); | |||||
| using Mode = Param::Mode; | |||||
| public: | |||||
| virtual void exec( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| virtual void deduce_layout(const TensorLayout& src, TensorLayout& dst); | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) = 0; | |||||
| }; | |||||
| using Norm = NormForward; | |||||
| } // namespace megdnn | } // namespace megdnn | ||||
| #include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
| @@ -1277,3 +1277,11 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), | |||||
| add_fields('bool', Doc('bias_correction', 'whether correct bias'), 'true'). | add_fields('bool', Doc('bias_correction', 'whether correct bias'), 'true'). | ||||
| add_fields('bool', Doc('always_adapt', 'apply adaptive lr to 0.0'), 'false') | add_fields('bool', Doc('always_adapt', 'apply adaptive lr to 0.0'), 'false') | ||||
| ) | ) | ||||
| (pdef("Norm"). | |||||
| add_enum('Mode', | |||||
| Doc('P_NORM=0', 'calculate p-norm, parameter p would be ignored in other mode'), | |||||
| Doc('INF_NORM=1', 'infinite norm'), | |||||
| Doc('NEG_INF_NORM=2', 'negative infinite norm'), name_field="mode"). | |||||
| add_fields('float32', Doc('p', 'the order of norm'), '2'). | |||||
| add_fields('int32', Doc('dim', 'which dim the norm performed along'), '-1'), | |||||
| ) | |||||
| @@ -212,7 +212,8 @@ private: | |||||
| cb(LAMBUpdate) \ | cb(LAMBUpdate) \ | ||||
| cb(LSTMBackward) \ | cb(LSTMBackward) \ | ||||
| cb(SoftmaxForward) \ | cb(SoftmaxForward) \ | ||||
| cb(SoftmaxBackward) | |||||
| cb(SoftmaxBackward) \ | |||||
| cb(NormForward) | |||||
| // clang-format on | // clang-format on | ||||
| /*! | /*! | ||||
| @@ -0,0 +1,43 @@ | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/common/utils.h" | |||||
| namespace megdnn { | |||||
| void NormForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { | |||||
| megdnn_assert( | |||||
| param().dim > -1 && param().dim < static_cast<dt_int32>(src.ndim), | |||||
| "dim params must be passed and cannot be -1."); | |||||
| SmallVector<size_t> shapeList; | |||||
| for (size_t i = 0; i < src.ndim; ++i) { | |||||
| if (static_cast<dt_int32>(i) != param().dim) { | |||||
| shapeList.append(1, static_cast<size_t>(src.shape[i])); | |||||
| } else { | |||||
| shapeList.append(1, static_cast<size_t>(1)); | |||||
| } | |||||
| } | |||||
| dst = TensorLayout{TensorShape(shapeList), src.dtype}; | |||||
| return; | |||||
| } | |||||
| void NormBase::check_exec( | |||||
| const TensorLayout& src, const TensorLayout& dst, size_t workspace_in_bytes) { | |||||
| megdnn_assert_eq_dtype(src, dst); | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| megdnn_assert( | |||||
| src.dtype.enumv() == DTypeEnum::Float16 || | |||||
| src.dtype.enumv() == DTypeEnum::Float32, | |||||
| "Float16 or Float32 is only supported."); | |||||
| #else | |||||
| megdnn_assert( | |||||
| src.dtype.enumv() == DTypeEnum::Float32, "Float32 is only supported."); | |||||
| #endif | |||||
| TensorLayout dst_expected; | |||||
| deduce_layout(src, dst_expected); | |||||
| megdnn_assert_eq_layout(dst_expected, dst); | |||||
| auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); | |||||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||||
| } | |||||
| } // namespace megdnn | |||||
| @@ -16,6 +16,7 @@ struct OprTrait {}; | |||||
| static const bool can_deduce_layout = CanDeduceLayout; \ | static const bool can_deduce_layout = CanDeduceLayout; \ | ||||
| } | } | ||||
| DEF(Norm, 2, true, true); | |||||
| DEF(Padding, 2, false, true); | DEF(Padding, 2, false, true); | ||||
| DEF(PaddingBackward, 2, false, false); | DEF(PaddingBackward, 2, false, false); | ||||
| DEF(ConvolutionForward, 3, true, true); | DEF(ConvolutionForward, 3, true, true); | ||||
| @@ -47,6 +47,7 @@ | |||||
| #include "src/cuda/matrix_mul/opr_impl.h" | #include "src/cuda/matrix_mul/opr_impl.h" | ||||
| #include "src/cuda/max_tensor_diff/opr_impl.h" | #include "src/cuda/max_tensor_diff/opr_impl.h" | ||||
| #include "src/cuda/mesh_indexing/opr_impl.h" | #include "src/cuda/mesh_indexing/opr_impl.h" | ||||
| #include "src/cuda/norm/opr_impl.h" | |||||
| #include "src/cuda/padding/opr_impl.h" | #include "src/cuda/padding/opr_impl.h" | ||||
| #include "src/cuda/param_pack/opr_impl.h" | #include "src/cuda/param_pack/opr_impl.h" | ||||
| #include "src/cuda/pooling/opr_impl.h" | #include "src/cuda/pooling/opr_impl.h" | ||||
| @@ -216,6 +217,7 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(DropoutForward); | |||||
| MEGDNN_SPECIALIZE_CREATE_OPERATOR(DropoutBackward); | MEGDNN_SPECIALIZE_CREATE_OPERATOR(DropoutBackward); | ||||
| MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxForward); | MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxForward); | ||||
| MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxBackward); | MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxBackward); | ||||
| MEGDNN_SPECIALIZE_CREATE_OPERATOR(NormForward); | |||||
| template <typename Opr> | template <typename Opr> | ||||
| std::unique_ptr<Opr> HandleImpl::create_operator() { | std::unique_ptr<Opr> HandleImpl::create_operator() { | ||||
| @@ -0,0 +1,28 @@ | |||||
| #include "helper.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "src/cuda/reduce_helper.cuh" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| using namespace device_reduce; | |||||
| #define COMMA , | |||||
| INST_REDUCE(NormOp<dt_float32 COMMA dt_float32 COMMA dt_float32>, false); | |||||
| INST_REDUCE(NormOp<dt_float16 COMMA dt_float16 COMMA dt_float16>, false); | |||||
| INST_REDUCE(NormZeroOp<dt_float32 COMMA dt_float32 COMMA dt_float32>, false); | |||||
| INST_REDUCE(NormZeroOp<dt_float16 COMMA dt_float16 COMMA dt_float16>, false); | |||||
| INST_REDUCE(NormOneOp<dt_float32 COMMA dt_float32 COMMA dt_float32>, false); | |||||
| INST_REDUCE(NormOneOp<dt_float16 COMMA dt_float16 COMMA dt_float16>, false); | |||||
| INST_REDUCE(NormTwoOp<dt_float32 COMMA dt_float32 COMMA dt_float32>, false); | |||||
| INST_REDUCE(NormTwoOp<dt_float16 COMMA dt_float16 COMMA dt_float16>, false); | |||||
| #undef COMMA | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,226 @@ | |||||
| #pragma once | |||||
| #include "megdnn/dtype.h" | |||||
| #if MEGDNN_CC_HOST | |||||
| #include "megdnn/basic_types.h" | |||||
| #endif | |||||
| namespace megdnn { | |||||
| namespace device_reduce { | |||||
| template <typename src_ctype, typename dst_ctype, typename wtype_> | |||||
| struct NormOp; | |||||
| template <> | |||||
| struct NormOp<dt_float32, dt_float32, dt_float32> { | |||||
| typedef dt_float32 wtype; | |||||
| typedef dt_float32 src_ctype; | |||||
| typedef dt_float32 dst_ctype; | |||||
| typedef wtype p_type; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| const p_type p; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { | |||||
| return powf(fabsf(src[idx]), p); | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { | |||||
| dst[idx] = powf(val, 1.f / p); | |||||
| } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormOp(src_ctype* src, dst_ctype* dst, size_t B, p_type p) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B), p(static_cast<wtype>(p)) {} | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormOp<dt_float16, dt_float16, dt_float16> { | |||||
| typedef dt_float16 wtype; | |||||
| typedef dt_float16 src_ctype; | |||||
| typedef dt_float16 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| const wtype p; | |||||
| // HALF_FLOAT API has dispatch host and device. | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { | |||||
| return half_float::detail::pow(half_float::detail::abs(src[idx]), p); | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { | |||||
| dst[idx] = half_float::detail::pow(val, static_cast<wtype>(1.f) / p); | |||||
| } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE | |||||
| NormOp(src_ctype* src, dst_ctype* dst, size_t B, dt_float32 p) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B), p(static_cast<wtype>(p)) {} | |||||
| }; | |||||
| #endif | |||||
| // TODO: 0Norm impl need understand reduceop | |||||
| template <typename src_ctype, typename dst_ctype, typename wtype_> | |||||
| struct NormZeroOp; | |||||
| template <> | |||||
| struct NormZeroOp<dt_float32, dt_float32, dt_float32> { | |||||
| typedef dt_float32 wtype; | |||||
| typedef dt_float32 src_ctype; | |||||
| typedef dt_float32 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| const wtype epsilon = 0.00001f; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { | |||||
| return fabsf(src[idx] - 0.0f) <= epsilon ? 0.0f : 1.0f; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormZeroOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormZeroOp<dt_float16, dt_float16, dt_float16> { | |||||
| typedef dt_float16 wtype; | |||||
| typedef dt_float16 src_ctype; | |||||
| typedef dt_float16 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| const wtype epsilon = half_float::half(0.00001f); | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { | |||||
| return half_float::detail::fabs(src[idx] - half_float::half()) <= epsilon | |||||
| ? half_float::half(0.0f) | |||||
| : half_float::half(1.0f); | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormZeroOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #endif | |||||
| template <typename src_ctype, typename dst_ctype, typename wtype_> | |||||
| struct NormOneOp; | |||||
| template <> | |||||
| struct NormOneOp<dt_float32, dt_float32, dt_float32> { | |||||
| typedef dt_float32 wtype; | |||||
| typedef dt_float32 src_ctype; | |||||
| typedef dt_float32 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return fabsf(src[idx]); } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormOneOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormOneOp<dt_float16, dt_float16, dt_float16> { | |||||
| typedef dt_float16 wtype; | |||||
| typedef dt_float16 src_ctype; | |||||
| typedef dt_float16 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { | |||||
| return half_float::detail::abs(src[idx]); | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormOneOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #endif | |||||
| template <typename src_ctype, typename dst_ctype, typename wtype_> | |||||
| struct NormTwoOp; | |||||
| template <> | |||||
| struct NormTwoOp<dt_float32, dt_float32, dt_float32> { | |||||
| typedef dt_float32 wtype; | |||||
| typedef dt_float32 src_ctype; | |||||
| typedef dt_float32 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx] * src[idx]; } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { | |||||
| dst[idx] = sqrtf(val); | |||||
| } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormTwoOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormTwoOp<dt_float16, dt_float16, dt_float16> { | |||||
| typedef dt_float16 wtype; | |||||
| typedef dt_float16 src_ctype; | |||||
| typedef dt_float16 dst_ctype; | |||||
| const wtype INIT; | |||||
| src_ctype* src; | |||||
| dst_ctype* dst; | |||||
| const size_t B; | |||||
| MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx] * src[idx]; } | |||||
| MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { | |||||
| dst[idx] = half_float::detail::sqrt(val); | |||||
| } | |||||
| static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { | |||||
| return lhs + rhs; | |||||
| } | |||||
| MEGDNN_HOST MEGDNN_DEVICE NormTwoOp(src_ctype* src, dst_ctype* dst, size_t B) | |||||
| : INIT(wtype(0)), src(src), dst(dst), B(B) {} | |||||
| }; | |||||
| #endif | |||||
| } // namespace device_reduce | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,180 @@ | |||||
| #include "src/cuda/norm/opr_impl.h" | |||||
| #include "helper.h" | |||||
| #include "src/common/reduce_helper_device.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "src/cuda/handle.h" | |||||
| #include "src/cuda/reduce_helper.cuh" | |||||
| #include "src/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| using namespace device_reduce; | |||||
| using Mode = Norm::Mode; | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::NEG_INF_NORM>( | |||||
| _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, | |||||
| size_t A, size_t B, size_t C, cudaStream_t stream) { | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| auto reduceOp = \ | |||||
| MinOp<ctype, ctype, ctype>(src.ptr<ctype>(), dst.ptr<ctype>(), B); \ | |||||
| run_reduce<MinOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, reduceOp); \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::INF_NORM>( | |||||
| _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, | |||||
| size_t A, size_t B, size_t C, cudaStream_t stream) { | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| auto reduceOp = \ | |||||
| MaxOp<ctype, ctype, ctype>(src.ptr<ctype>(), dst.ptr<ctype>(), B); \ | |||||
| run_reduce<MaxOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, reduceOp); \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::P_NORM>( | |||||
| _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, | |||||
| size_t A, size_t B, size_t C, cudaStream_t stream) { | |||||
| typedef dt_float32 p_type; | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| p_type epsilon = 0.000001f; \ | |||||
| if (fabs(param().p - 0.0f) < epsilon) { \ | |||||
| run_reduce<NormZeroOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, \ | |||||
| NormZeroOp<ctype, ctype, ctype>( \ | |||||
| src.ptr<ctype>(), dst.ptr<ctype>(), B)); \ | |||||
| } else if (fabs(param().p - 1.0f) < epsilon) { \ | |||||
| run_reduce<NormOneOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, \ | |||||
| NormOneOp<ctype, ctype, ctype>( \ | |||||
| src.ptr<ctype>(), dst.ptr<ctype>(), B)); \ | |||||
| } else if (fabs(param().p - 2.0f) < epsilon) { \ | |||||
| run_reduce<NormTwoOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, \ | |||||
| NormTwoOp<ctype, ctype, ctype>( \ | |||||
| src.ptr<ctype>(), dst.ptr<ctype>(), B)); \ | |||||
| } else { \ | |||||
| run_reduce<NormOp<ctype, ctype, ctype>, false>( \ | |||||
| workspace.ptr<ctype>(), A, B, C, stream, \ | |||||
| NormOp<ctype, ctype, ctype>( \ | |||||
| src.ptr<ctype>(), dst.ptr<ctype>(), B, param().p)); \ | |||||
| } \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| } // namespace cuda | |||||
| namespace cuda { | |||||
| void NormForwardImpl::exec( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { | |||||
| check_exec(src.layout, dst.layout, workspace.size); | |||||
| size_t A, B, C; | |||||
| reduce::get_ABC(src.layout, A, B, C, param().dim); | |||||
| auto stream = cuda_stream(this->handle()); | |||||
| #define CASE(mode) \ | |||||
| case mode: { \ | |||||
| dispatch_mode<mode>(src, dst, workspace, A, B, C, stream); \ | |||||
| break; \ | |||||
| }; | |||||
| switch (param().mode) { | |||||
| CASE(Mode::P_NORM) | |||||
| CASE(Mode::INF_NORM) | |||||
| CASE(Mode::NEG_INF_NORM) | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| return; | |||||
| } | |||||
| size_t NormForwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) { | |||||
| using namespace device_reduce; | |||||
| size_t A, B, C; | |||||
| reduce::get_ABC(src, A, B, C, param().dim); | |||||
| #define cb(dt, op) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| return get_reduce_workspace_in_bytes<op<ctype, ctype, ctype>>(A, B, C); \ | |||||
| break; \ | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| #define CASE(mode, op) \ | |||||
| case mode: { \ | |||||
| switch (src.dtype.enumv()) { \ | |||||
| cb(::megdnn::dtype::Float32, op) cb(::megdnn::dtype::Float16, op) default \ | |||||
| : megdnn_assert_internal(false); \ | |||||
| } \ | |||||
| }; | |||||
| #else | |||||
| #define CASE(mode, op) \ | |||||
| case mode: { \ | |||||
| switch (src.dtype.enumv()) { \ | |||||
| cb(::megdnn::dtype::Float32, op) default : megdnn_assert_internal(false); \ | |||||
| } \ | |||||
| }; | |||||
| #endif | |||||
| // XXX: 0/1 norm dispathed to different Op, but workspace size same as | |||||
| // NormOp | |||||
| switch (param().mode) { | |||||
| CASE(Mode::INF_NORM, MaxOp) | |||||
| CASE(Mode::NEG_INF_NORM, MinOp) | |||||
| CASE(Mode::P_NORM, NormOp) | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| #undef cb | |||||
| } | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,25 @@ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| class NormForwardImpl : public NormForward { | |||||
| using Norm::Norm; | |||||
| public: | |||||
| void exec( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) override; | |||||
| protected: | |||||
| template <Mode mode> | |||||
| void dispatch_mode( | |||||
| _megdnn_tensor_inout src, _megdnn_tensor_inout dst, | |||||
| _megdnn_workspace workspace, size_t A, size_t B, size_t C, | |||||
| cudaStream_t stream); | |||||
| }; | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -51,6 +51,7 @@ | |||||
| #include "src/naive/matrix_mul/opr_impl.h" | #include "src/naive/matrix_mul/opr_impl.h" | ||||
| #include "src/naive/max_tensor_diff/opr_impl.h" | #include "src/naive/max_tensor_diff/opr_impl.h" | ||||
| #include "src/naive/mesh_indexing/opr_impl.h" | #include "src/naive/mesh_indexing/opr_impl.h" | ||||
| #include "src/naive/norm/opr_impl.h" | |||||
| #include "src/naive/padding/opr_impl.h" | #include "src/naive/padding/opr_impl.h" | ||||
| #include "src/naive/param_pack/opr_impl.h" | #include "src/naive/param_pack/opr_impl.h" | ||||
| #include "src/naive/pooling/opr_impl.h" | #include "src/naive/pooling/opr_impl.h" | ||||
| @@ -0,0 +1,152 @@ | |||||
| #pragma once | |||||
| #include <algorithm> | |||||
| #include <numeric> | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "src/common/utils.h" | |||||
| using namespace megdnn; | |||||
| /* anonymous namespace */ | |||||
| namespace { | |||||
| using Mode = Reduce::Mode; | |||||
| /* Reduce Trait */ | |||||
| template <Mode mode, typename ctype> | |||||
| struct Trait; | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::SUM, ctype> { | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <typename ctype> | |||||
| const ctype Trait<Mode::SUM, ctype>::INIT = ctype(0); | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::MEAN, ctype> { | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t B) { return x / (ctype)B; } | |||||
| }; | |||||
| template <typename ctype> | |||||
| const ctype Trait<Mode::MEAN, ctype>::INIT = ctype(0); | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::SUM_SQR, ctype> { | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x) { return x * x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <typename ctype> | |||||
| const ctype Trait<Mode::SUM_SQR, ctype>::INIT = ctype(0); | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::PRODUCT, ctype> { | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x * y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <typename ctype> | |||||
| const ctype Trait<Mode::PRODUCT, ctype>::INIT = ctype(1); | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::MIN, ctype> { | |||||
| static ctype apply(ctype x, ctype y) { return x < y ? x : y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <> | |||||
| struct Trait<Mode::MIN, dt_float32> { | |||||
| using ctype = dt_float32; | |||||
| static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x < y) ? x : y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <typename ctype> | |||||
| struct Trait<Mode::MAX, ctype> { | |||||
| static ctype apply(ctype x, ctype y) { return x > y ? x : y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| template <> | |||||
| struct Trait<Mode::MAX, dt_float32> { | |||||
| using ctype = dt_float32; | |||||
| static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x > y) ? x : y; } | |||||
| static ctype visit(ctype x) { return x; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| /* NormOp */ | |||||
| template <typename ctype> | |||||
| struct NormOp; | |||||
| template <> | |||||
| struct NormOp<dt_float32> { | |||||
| typedef dt_float32 ctype; | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x, dt_float32 p) { return powf(fabs(x), p); } | |||||
| static ctype write(ctype x, size_t, dt_float32 p) { return powf(x, 1.f / p); } | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormOp<dt_float16> { | |||||
| typedef dt_float16 ctype; | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x, dt_float32 p) { | |||||
| return half_float::pow(half_float::abs(x), half_float::half(p)); | |||||
| } | |||||
| static ctype write(ctype x, size_t, dt_float32 p) { | |||||
| return half_float::pow(x, half_float::half(1.f / p)); | |||||
| } | |||||
| }; | |||||
| #endif | |||||
| template <typename ctype> | |||||
| struct NormZeroOp; | |||||
| template <> | |||||
| struct NormZeroOp<dt_float32> { | |||||
| typedef dt_float32 ctype; | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x) { return x - 0.f < 0.00001f ? 0.f : 1.f; } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| template <> | |||||
| struct NormZeroOp<dt_float16> { | |||||
| typedef dt_float16 ctype; | |||||
| static const ctype INIT; | |||||
| static ctype apply(ctype x, ctype y) { return x + y; } | |||||
| static ctype visit(ctype x) { | |||||
| return x - half_float::half(0.f) < half_float::half(0.00001f) | |||||
| ? half_float::half(0.f) | |||||
| : half_float::half(1.f); | |||||
| } | |||||
| static ctype write(ctype x, size_t) { return x; } | |||||
| }; | |||||
| #endif | |||||
| } // namespace | |||||
| @@ -0,0 +1,197 @@ | |||||
| #include "src/naive/norm/opr_impl.h" | |||||
| #include "helper.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "src/naive/handle.h" | |||||
| namespace megdnn { | |||||
| namespace naive { | |||||
| using Mode = Norm::Mode; | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::NEG_INF_NORM>( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| const ctype* __restrict sptr = src.ptr<ctype>(); \ | |||||
| ctype* __restrict dptr = dst.ptr<ctype>(); \ | |||||
| std::function<ctype(size_t, size_t, size_t, size_t)> func; \ | |||||
| func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ | |||||
| if (bl + 1 < br) { \ | |||||
| size_t mid = bl + (br - bl) / 2; \ | |||||
| return Trait<ReduceForward::Mode::MIN, ctype>::apply( \ | |||||
| func(a, c, bl, mid), func(a, c, mid, br)); \ | |||||
| } else { \ | |||||
| return Trait<ReduceForward::Mode::MIN, ctype>::visit( \ | |||||
| sptr[a * B * C + bl * C + c]); \ | |||||
| } \ | |||||
| }; \ | |||||
| for (size_t a = 0; a < A; ++a) \ | |||||
| for (size_t c = 0; c < C; ++c) { \ | |||||
| dptr[a * C + c] = Trait<ReduceForward::Mode::MIN, ctype>::write( \ | |||||
| func(a, c, 0, B), B); \ | |||||
| } \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::INF_NORM>( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| const ctype* __restrict sptr = src.ptr<ctype>(); \ | |||||
| ctype* __restrict dptr = dst.ptr<ctype>(); \ | |||||
| std::function<ctype(size_t, size_t, size_t, size_t)> func; \ | |||||
| func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ | |||||
| if (bl + 1 < br) { \ | |||||
| size_t mid = bl + (br - bl) / 2; \ | |||||
| return Trait<ReduceForward::Mode::MAX, ctype>::apply( \ | |||||
| func(a, c, bl, mid), func(a, c, mid, br)); \ | |||||
| } else { \ | |||||
| return Trait<ReduceForward::Mode::MAX, ctype>::visit( \ | |||||
| sptr[a * B * C + bl * C + c]); \ | |||||
| } \ | |||||
| }; \ | |||||
| for (size_t a = 0; a < A; ++a) \ | |||||
| for (size_t c = 0; c < C; ++c) { \ | |||||
| dptr[a * C + c] = Trait<ReduceForward::Mode::MAX, ctype>::write( \ | |||||
| func(a, c, 0, B), B); \ | |||||
| } \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| template <> | |||||
| void NormForwardImpl::dispatch_mode<Mode::P_NORM>( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { | |||||
| #define CASE(dt) \ | |||||
| case DTypeTrait<dt>::enumv: { \ | |||||
| using ctype = DTypeTrait<dt>::ctype; \ | |||||
| const ctype* __restrict sptr = src.ptr<ctype>(); \ | |||||
| ctype* __restrict dptr = dst.ptr<ctype>(); \ | |||||
| std::function<ctype(size_t, size_t, size_t, size_t)> func; \ | |||||
| if (param().p - 0.f < 0.00001f) { \ | |||||
| func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ | |||||
| if (bl + 1 < br) { \ | |||||
| size_t mid = bl + (br - bl) / 2; \ | |||||
| return NormZeroOp<ctype>::apply( \ | |||||
| func(a, c, bl, mid), func(a, c, mid, br)); \ | |||||
| } else { \ | |||||
| return NormZeroOp<ctype>::visit(sptr[a * B * C + bl * C + c]); \ | |||||
| } \ | |||||
| }; \ | |||||
| for (size_t a = 0; a < A; ++a) { \ | |||||
| for (size_t c = 0; c < C; ++c) { \ | |||||
| dptr[a * C + c] = NormZeroOp<ctype>::write(func(a, c, 0, B), B); \ | |||||
| } \ | |||||
| } \ | |||||
| } else { \ | |||||
| func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ | |||||
| if (bl + 1 < br) { \ | |||||
| size_t mid = bl + (br - bl) / 2; \ | |||||
| return NormOp<ctype>::apply( \ | |||||
| func(a, c, bl, mid), func(a, c, mid, br)); \ | |||||
| } else { \ | |||||
| return NormOp<ctype>::visit( \ | |||||
| sptr[a * B * C + bl * C + c], param().p); \ | |||||
| } \ | |||||
| }; \ | |||||
| for (size_t a = 0; a < A; ++a) { \ | |||||
| for (size_t c = 0; c < C; ++c) { \ | |||||
| dptr[a * C + c] = \ | |||||
| NormOp<ctype>::write(func(a, c, 0, B), B, param().p); \ | |||||
| } \ | |||||
| } \ | |||||
| } \ | |||||
| break; \ | |||||
| }; | |||||
| switch (src.layout.dtype.enumv()) { | |||||
| CASE(::megdnn::dtype::Float32) | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| CASE(::megdnn::dtype::Float16) | |||||
| #endif | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| void NormForwardImpl::exec( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { | |||||
| check_exec(src.layout, dst.layout, workspace.size); | |||||
| using namespace reduce; | |||||
| size_t A, B, C; | |||||
| reduce::get_ABC(src.layout, A, B, C, param().dim); | |||||
| auto make_tensor = [&](DType comp_dtype, _megdnn_tensor_inout tensor, | |||||
| dt_byte*& workspace_ptr) { | |||||
| if (comp_dtype == tensor.layout.dtype) | |||||
| return tensor; | |||||
| auto layout = TensorLayout(tensor.layout, comp_dtype); | |||||
| TensorND new_tensor{workspace_ptr, layout}; | |||||
| workspace_ptr += layout.span().dist_byte(); | |||||
| return new_tensor; | |||||
| }; | |||||
| auto typecvt = handle()->create_operator<TypeCvt>(); | |||||
| auto copy_to = [&typecvt](const TensorND& from, const TensorND& to) { | |||||
| if (from.raw_ptr() != to.raw_ptr()) | |||||
| typecvt->exec(from, to); | |||||
| }; | |||||
| auto workspace_ptr = workspace.ptr<dt_byte>(); | |||||
| auto new_src = make_tensor(src.layout.dtype, src, workspace_ptr); | |||||
| auto new_dst = make_tensor(dst.layout.dtype, dst, workspace_ptr); | |||||
| #define CASE(mode) \ | |||||
| case mode: { \ | |||||
| copy_to(src, new_src); \ | |||||
| ::megdnn::naive::HandleImpl* handlePtr = static_cast<HandleImpl*>(handle()); \ | |||||
| MEGDNN_DISPATCH_CPU_KERN( \ | |||||
| handlePtr, dispatch_mode<mode>(new_src, new_dst, A, B, C)); \ | |||||
| copy_to(new_dst, dst); \ | |||||
| break; \ | |||||
| }; | |||||
| switch (param().mode) { | |||||
| CASE(Mode::P_NORM) | |||||
| CASE(Mode::INF_NORM) | |||||
| CASE(Mode::NEG_INF_NORM) | |||||
| default: | |||||
| megdnn_assert_internal(false); | |||||
| } | |||||
| #undef CASE | |||||
| } | |||||
| size_t NormForwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) { | |||||
| MEGDNN_MARK_USED_VAR(src); | |||||
| MEGDNN_MARK_USED_VAR(dst); | |||||
| return 0; | |||||
| } | |||||
| } // namespace naive | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,23 @@ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/common/reduce_helper.h" | |||||
| #include "src/naive/reduce/opr_impl.h" | |||||
| namespace megdnn { | |||||
| namespace naive { | |||||
| class NormForwardImpl : public Norm { | |||||
| public: | |||||
| using Norm::Norm; | |||||
| void exec( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) override; | |||||
| protected: | |||||
| template <Mode mode> | |||||
| void dispatch_mode( | |||||
| _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t, size_t, size_t); | |||||
| }; | |||||
| } // namespace naive | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,19 @@ | |||||
| #pragma once | |||||
| #include <iostream> | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "megdnn/opr_param_defs.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| namespace norm { | |||||
| struct TestArg { | |||||
| param::Norm param; | |||||
| TensorShape src; | |||||
| TestArg(param::Norm param, TensorShape src) : param(param), src(src) {} | |||||
| }; | |||||
| } // namespace norm | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,291 @@ | |||||
| #include "test/common/norm.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "megdnn/oprs.h" | |||||
| #include "test/common/checker.h" | |||||
| // #include "test/naive/fixture.h" | |||||
| // #include "test/common/benchmarker.h" | |||||
| #include <iostream> | |||||
| #include "test/cuda/benchmark.h" | |||||
| #include "test/cuda/fixture.h" | |||||
| #include "test/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| // CORRECT | |||||
| // L2, fp32, dim | |||||
| TEST_F(CUDA, L2NORM_FP32_DIM0) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 0; | |||||
| checker.set_param(param); | |||||
| checker.exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, L2NORM_FP32_DIM1) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 1; | |||||
| checker.set_param(param); | |||||
| checker.exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 1, 3, 4}, dtype::Float32(), | |||||
| {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200, | |||||
| 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, L2NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float32(), | |||||
| {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})}); | |||||
| } | |||||
| // TODO: support -1 dim param, or test for assert | |||||
| // l2, fp16 | |||||
| TEST_F(CUDA, L2NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float16(), | |||||
| {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})}); | |||||
| } | |||||
| // l1, fp32,fp16 | |||||
| TEST_F(CUDA, L1NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 1; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, L1NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 1; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}), | |||||
| }); | |||||
| } | |||||
| // l0, fp32,fp16 | |||||
| TEST_F(CUDA, L0NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 0; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, L0NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.p = 0; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}), | |||||
| }); | |||||
| } | |||||
| // inf | |||||
| TEST_F(CUDA, INF_NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| using Mode = Norm::Param::Mode; | |||||
| param.dim = 3; | |||||
| param.mode = Mode::INF_NORM; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, INF_NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| using Mode = Norm::Param::Mode; | |||||
| param.dim = 3; | |||||
| param.mode = Mode::INF_NORM; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}), | |||||
| }); | |||||
| } | |||||
| // -inf | |||||
| TEST_F(CUDA, NEG_INF_NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::NEG_INF_NORM; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}), | |||||
| }); | |||||
| } | |||||
| TEST_F(CUDA, NEG_INF_NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle_cuda()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::NEG_INF_NORM; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}), | |||||
| }); | |||||
| } | |||||
| // PERF | |||||
| TEST_F(CUDA, L2NORM_SPEED_FP32) { | |||||
| auto benchmarker = Benchmarker<Norm>(handle_cuda()); | |||||
| benchmarker.set_dtype(0, dtype::Float32()); | |||||
| benchmarker.set_dtype(1, dtype::Float32()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::P_NORM; | |||||
| param.dim = 0; | |||||
| param.p = 2; | |||||
| SmallVector<TensorShape> shapes{{4194304}, {}}; | |||||
| NormalRNG rng(0, 1); | |||||
| float eachTime; | |||||
| float totalTime = 0.f; | |||||
| #define ITER 10 | |||||
| for (auto i = 0; i < ITER; i++) { | |||||
| eachTime = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); | |||||
| // printf("PNORM_SPEED_FP32 cuda time: %.6fms\n", eachTime); | |||||
| totalTime += eachTime; | |||||
| } | |||||
| totalTime /= ITER; | |||||
| printf("PNORM_SPEED_FP32 AVG TIME: %.6fms\n", totalTime); | |||||
| #undef ITER | |||||
| } | |||||
| TEST_F(CUDA, INFNORM_SPEED_FP32) { | |||||
| auto benchmarker = Benchmarker<Norm>(handle_cuda()); | |||||
| benchmarker.set_dtype(0, dtype::Float32()); | |||||
| benchmarker.set_dtype(1, dtype::Float32()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::INF_NORM; | |||||
| param.dim = 0; | |||||
| SmallVector<TensorShape> shapes{{4194304}, {}}; | |||||
| NormalRNG rng(0, 1); | |||||
| float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); | |||||
| printf("INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32); | |||||
| } | |||||
| TEST_F(CUDA, NEG_INFNORM_SPEED_FP32) { | |||||
| auto benchmarker = Benchmarker<Norm>(handle_cuda()); | |||||
| benchmarker.set_dtype(0, dtype::Float32()); | |||||
| benchmarker.set_dtype(1, dtype::Float32()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::NEG_INF_NORM; | |||||
| param.dim = 0; | |||||
| SmallVector<TensorShape> shapes{{4194304}, {}}; | |||||
| NormalRNG rng(0, 1); | |||||
| float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); | |||||
| printf("NEG_INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32); | |||||
| } | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,237 @@ | |||||
| #include "test/common/norm.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "megdnn/oprs.h" | |||||
| #include "test/common/benchmarker.h" | |||||
| #include "test/common/checker.h" | |||||
| #include "test/naive/fixture.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| TEST_F(NAIVE, L2NORM_FP32_DIM0) { | |||||
| Checker<Norm> checker(handle(), false); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 0; | |||||
| checker.set_param(param); | |||||
| checker.exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, L2NORM_FP32_DIM1) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 1; | |||||
| checker.set_param(param); | |||||
| checker.exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 1, 3, 4}, dtype::Float32(), | |||||
| {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200, | |||||
| 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, L2NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float32(), | |||||
| {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})}); | |||||
| } | |||||
| // l2, fp16 | |||||
| TEST_F(NAIVE, L2NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 2; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float16(), | |||||
| {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})}); | |||||
| } | |||||
| // l1, fp32,fp16 | |||||
| TEST_F(NAIVE, L1NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 1; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, L1NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 1; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue( | |||||
| {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}), | |||||
| }); | |||||
| } | |||||
| // l0, fp32,fp16 | |||||
| TEST_F(NAIVE, L0NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 0; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, L0NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.p = 0; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}), | |||||
| }); | |||||
| } | |||||
| // inf | |||||
| TEST_F(NAIVE, INF_NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| using Mode = Norm::Param::Mode; | |||||
| param.dim = 3; | |||||
| param.mode = Mode::INF_NORM; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, INF_NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| using Mode = Norm::Param::Mode; | |||||
| param.dim = 3; | |||||
| param.mode = Mode::INF_NORM; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}), | |||||
| }); | |||||
| } | |||||
| // -inf | |||||
| TEST_F(NAIVE, NEG_INF_NORM_FP32_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::NEG_INF_NORM; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float32(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}), | |||||
| }); | |||||
| } | |||||
| TEST_F(NAIVE, NEG_INF_NORM_FP16_DIM3) { | |||||
| Checker<Norm> checker(handle()); | |||||
| Norm::Param param; | |||||
| param.mode = Norm::Param::Mode::NEG_INF_NORM; | |||||
| param.dim = 3; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{ | |||||
| TensorValue( | |||||
| {1, 2, 3, 4}, dtype::Float16(), | |||||
| {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, | |||||
| 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), | |||||
| {}}, | |||||
| Testcase{ | |||||
| {}, | |||||
| TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}), | |||||
| }); | |||||
| } | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||