diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index a64c651b8..9541c18d2 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -1147,7 +1147,7 @@ x3 = pooling3d(x2, kernel, stride) | 18 | out_h | int | out_w | | | 21 | kernel_d | int | kernel_w | | | 22 | stride_d | int | stride_w | | -| 23 | pad_front | int | pad_top | | +| 23 | pad_front | int | pad_left | | | 28 | out_d | int | out_w | | Pooling type: diff --git a/src/layer/gelu.cpp b/src/layer/gelu.cpp index 8cc6e484f..45ed5a3e7 100644 --- a/src/layer/gelu.cpp +++ b/src/layer/gelu.cpp @@ -26,7 +26,7 @@ GELU::GELU() int GELU::load_param(const ParamDict& pd) { - fast_gelu = pd.get(0, 0) != 0; + fast_gelu = pd.get(0, 0); return 0; } diff --git a/src/layer/gelu.h b/src/layer/gelu.h index 612ef1a43..a4fb37a6f 100644 --- a/src/layer/gelu.h +++ b/src/layer/gelu.h @@ -29,7 +29,7 @@ public: virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; public: - bool fast_gelu; + int fast_gelu; }; } // namespace ncnn diff --git a/src/layer/pooling3d.cpp b/src/layer/pooling3d.cpp index 509490669..b2cf3c2d2 100644 --- a/src/layer/pooling3d.cpp +++ b/src/layer/pooling3d.cpp @@ -37,7 +37,7 @@ int Pooling3D::load_param(const ParamDict& pd) pad_right = pd.get(14, pad_left); pad_top = pd.get(13, pad_left); pad_bottom = pd.get(15, pad_top); - pad_front = pd.get(23, pad_top); + pad_front = pd.get(23, pad_left); pad_behind = pd.get(16, pad_front); global_pooling = pd.get(4, 0); pad_mode = pd.get(5, 0); diff --git a/tools/modelwriter.h b/tools/modelwriter.h index cf5236a65..a74ec3a9f 100644 --- a/tools/modelwriter.h +++ b/tools/modelwriter.h @@ -36,7 +36,10 @@ #include "layer/concat.h" #include "layer/convolution.h" #include "layer/convolution1d.h" +#include "layer/convolution3d.h" #include "layer/convolutiondepthwise.h" +#include "layer/convolutiondepthwise1d.h" +#include "layer/convolutiondepthwise3d.h" #include "layer/crop.h" #include "layer/deconvolution.h" #include "layer/deconvolutiondepthwise.h" @@ -48,6 +51,7 @@ #include "layer/exp.h" #include "layer/expanddims.h" #include "layer/flatten.h" +#include "layer/gelu.h" #include "layer/gemm.h" #include "layer/groupnorm.h" #include "layer/gru.h" @@ -57,17 +61,21 @@ #include "layer/input.h" #include "layer/instancenorm.h" #include "layer/interp.h" +#include "layer/layernorm.h" #include "layer/log.h" #include "layer/lrn.h" #include "layer/lstm.h" +#include "layer/matmul.h" #include "layer/memorydata.h" #include "layer/mvn.h" +#include "layer/multiheadattention.h" #include "layer/normalize.h" #include "layer/padding.h" #include "layer/permute.h" #include "layer/pixelshuffle.h" #include "layer/pooling.h" #include "layer/pooling1d.h" +#include "layer/pooling3d.h" #include "layer/power.h" #include "layer/prelu.h" #include "layer/priorbox.h" @@ -921,6 +929,63 @@ int ModelWriter::save(const char* parampath, const char* binpath) mac += (uint64_t)op->kernel_w * outw * outh * inh; } } + else if (layer->type == "Convolution3D") + { + ncnn::Convolution3D* op = (ncnn::Convolution3D*)layer; + ncnn::Convolution3D* op_default = (ncnn::Convolution3D*)layer_default; + + fprintf_param_value(" 0=%d", num_output) + fprintf_param_value(" 1=%d", kernel_w) + { + if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); + if (op->kernel_d != op->kernel_w) fprintf(pp, " 21=%d", op->kernel_d); + } + fprintf_param_value(" 2=%d", dilation_w) + { + if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); + if (op->dilation_d != op->dilation_w) fprintf(pp, " 22=%d", op->dilation_d); + } + fprintf_param_value(" 3=%d", stride_w) + { + if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); + if (op->stride_d != op->stride_w) fprintf(pp, " 23=%d", op->stride_d); + } + fprintf_param_value(" 4=%d", pad_left) + { + if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); + if (op->pad_front != op->pad_left) fprintf(pp, " 24=%d", op->pad_front); + } + { + if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); + } + { + if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); + } + { + if (op->pad_behind != op->pad_front) fprintf(pp, " 17=%d", op->pad_behind); + } + fprintf_param_value(" 18=%e", pad_value) + fprintf_param_value(" 5=%d", bias_term) + fprintf_param_value(" 6=%d", weight_data_size) + fprintf_param_value(" 9=%d", activation_type) + { + if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); + } + + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + + if (shape_ready) + { + int inc = blobs[layer->bottoms[0]].shape.c; + int outw = blobs[layer->tops[0]].shape.w; + int outh = blobs[layer->tops[0]].shape.h; + int outd = blobs[layer->tops[0]].shape.d; + int outc = blobs[layer->tops[0]].shape.c; + + mac += (uint64_t)op->kernel_d * op->kernel_h * op->kernel_w * outw * outh * outd * outc * inc; + } + } else if (layer->type == "ConvolutionDepthWise") { ncnn::ConvolutionDepthWise* op = (ncnn::ConvolutionDepthWise*)layer; @@ -996,6 +1061,98 @@ int ModelWriter::save(const char* parampath, const char* binpath) mac += (uint64_t)op->kernel_h * op->kernel_w * outw * outh * (outc / op->group) * (inc / op->group) * op->group; } } + else if (layer->type == "ConvolutionDepthWise1D") + { + ncnn::ConvolutionDepthWise1D* op = (ncnn::ConvolutionDepthWise1D*)layer; + ncnn::ConvolutionDepthWise1D* op_default = (ncnn::ConvolutionDepthWise1D*)layer_default; + + fprintf_param_value(" 0=%d", num_output) + fprintf_param_value(" 1=%d", kernel_w) + fprintf_param_value(" 2=%d", dilation_w) + fprintf_param_value(" 3=%d", stride_w) + fprintf_param_value(" 4=%d", pad_left) + { + if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); + } + fprintf_param_value(" 18=%e", pad_value) + fprintf_param_value(" 5=%d", bias_term) + fprintf_param_value(" 6=%d", weight_data_size) + fprintf_param_value(" 7=%d", group) + fprintf_param_value(" 9=%d", activation_type) + { + if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); + } + + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + + if (shape_ready) + { + int inh = blobs[layer->bottoms[0]].shape.h; + int outw = blobs[layer->tops[0]].shape.w; + int outh = blobs[layer->tops[0]].shape.h; + + mac += (uint64_t)op->kernel_w * outw * (outh / op->group) * (inh / op->group) * op->group; + } + } + else if (layer->type == "ConvolutionDepthWise3D") + { + ncnn::ConvolutionDepthWise3D* op = (ncnn::ConvolutionDepthWise3D*)layer; + ncnn::ConvolutionDepthWise3D* op_default = (ncnn::ConvolutionDepthWise3D*)layer_default; + + fprintf_param_value(" 0=%d", num_output) + fprintf_param_value(" 1=%d", kernel_w) + { + if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); + if (op->kernel_d != op->kernel_w) fprintf(pp, " 21=%d", op->kernel_d); + } + fprintf_param_value(" 2=%d", dilation_w) + { + if (op->dilation_h != op->dilation_w) fprintf(pp, " 12=%d", op->dilation_h); + if (op->dilation_d != op->dilation_w) fprintf(pp, " 22=%d", op->dilation_d); + } + fprintf_param_value(" 3=%d", stride_w) + { + if (op->stride_h != op->stride_w) fprintf(pp, " 13=%d", op->stride_h); + if (op->stride_d != op->stride_w) fprintf(pp, " 23=%d", op->stride_d); + } + fprintf_param_value(" 4=%d", pad_left) + { + if (op->pad_top != op->pad_left) fprintf(pp, " 14=%d", op->pad_top); + if (op->pad_front != op->pad_left) fprintf(pp, " 24=%d", op->pad_front); + } + { + if (op->pad_right != op->pad_left) fprintf(pp, " 15=%d", op->pad_right); + } + { + if (op->pad_bottom != op->pad_top) fprintf(pp, " 16=%d", op->pad_bottom); + } + { + if (op->pad_behind != op->pad_front) fprintf(pp, " 17=%d", op->pad_behind); + } + fprintf_param_value(" 18=%e", pad_value) + fprintf_param_value(" 5=%d", bias_term) + fprintf_param_value(" 6=%d", weight_data_size) + fprintf_param_value(" 7=%d", group) + fprintf_param_value(" 9=%d", activation_type) + { + if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); + } + + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + + if (shape_ready) + { + int inc = blobs[layer->bottoms[0]].shape.c; + int outw = blobs[layer->tops[0]].shape.w; + int outh = blobs[layer->tops[0]].shape.h; + int outd = blobs[layer->tops[0]].shape.d; + int outc = blobs[layer->tops[0]].shape.c; + + mac += (uint64_t)op->kernel_d * op->kernel_h * op->kernel_w * outw * outh * outd * (outc / op->group) * (inc / op->group) * op->group; + } + } else if (layer->type == "Crop") { ncnn::Crop* op = (ncnn::Crop*)layer; @@ -1206,6 +1363,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) if (!op->axes.empty()) fprintf_param_int_array(0, op->axes, pp); } } + else if (layer->type == "GELU") + { + ncnn::GELU* op = (ncnn::GELU*)layer; + ncnn::GELU* op_default = (ncnn::GELU*)layer_default; + + fprintf_param_value(" 0=%d", fast_gelu) + } else if (layer->type == "Gemm") { ncnn::Gemm* op = (ncnn::Gemm*)layer; @@ -1328,6 +1492,18 @@ int ModelWriter::save(const char* parampath, const char* binpath) fprintf_param_value(" 5=%d", dynamic_target_size) fprintf_param_value(" 6=%d", align_corner) } + else if (layer->type == "LayerNorm") + { + ncnn::LayerNorm* op = (ncnn::LayerNorm*)layer; + ncnn::LayerNorm* op_default = (ncnn::LayerNorm*)layer_default; + + fprintf_param_value(" 0=%d", affine_size) + fprintf_param_value(" 1=%e", eps) + fprintf_param_value(" 2=%d", affine) + + fwrite_weight_data(op->gamma_data, bp); + fwrite_weight_data(op->beta_data, bp); + } else if (layer->type == "Log") { ncnn::Log* op = (ncnn::Log*)layer; @@ -1361,6 +1537,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) fwrite_weight_tag_data(op->bias_c_data, bp); fwrite_weight_tag_data(op->weight_hc_data, bp); } + else if (layer->type == "MatMul") + { + ncnn::MatMul* op = (ncnn::MatMul*)layer; + ncnn::MatMul* op_default = (ncnn::MatMul*)layer_default; + + fprintf_param_value(" 0=%d", transB) + } else if (layer->type == "MemoryData") { ncnn::MemoryData* op = (ncnn::MemoryData*)layer; @@ -1371,6 +1554,24 @@ int ModelWriter::save(const char* parampath, const char* binpath) fprintf_param_value(" 2=%d", c) fwrite_weight_data(op->data, bp); } + else if (layer->type == "MultiHeadAttention") + { + ncnn::MultiHeadAttention* op = (ncnn::MultiHeadAttention*)layer; + ncnn::MultiHeadAttention* op_default = (ncnn::MultiHeadAttention*)layer_default; + + fprintf_param_value(" 0=%d", embed_dim) + fprintf_param_value(" 1=%d", num_head) + fprintf_param_value(" 2=%d", weight_data_size) + + fwrite_weight_tag_data(op->q_weight_data, bp); + fwrite_weight_data(op->q_bias_data, bp); + fwrite_weight_tag_data(op->k_weight_data, bp); + fwrite_weight_data(op->k_bias_data, bp); + fwrite_weight_tag_data(op->v_weight_data, bp); + fwrite_weight_data(op->v_bias_data, bp); + fwrite_weight_tag_data(op->out_weight_data, bp); + fwrite_weight_data(op->out_bias_data, bp); + } else if (layer->type == "MVN") { ncnn::MVN* op = (ncnn::MVN*)layer; @@ -1477,6 +1678,46 @@ int ModelWriter::save(const char* parampath, const char* binpath) fprintf_param_value(" 7=%d", adaptive_pooling) fprintf_param_value(" 8=%d", out_w) } + else if (layer->type == "Pooling3D") + { + ncnn::Pooling3D* op = (ncnn::Pooling3D*)layer; + ncnn::Pooling3D* op_default = (ncnn::Pooling3D*)layer_default; + + fprintf_param_value(" 0=%d", pooling_type) + fprintf_param_value(" 1=%d", kernel_w) + { + if (op->kernel_h != op->kernel_w) fprintf(pp, " 11=%d", op->kernel_h); + if (op->kernel_d != op->kernel_w) fprintf(pp, " 21=%d", op->kernel_d); + } + fprintf_param_value(" 2=%d", stride_w) + { + if (op->stride_h != op->stride_w) fprintf(pp, " 12=%d", op->stride_h); + if (op->stride_d != op->stride_w) fprintf(pp, " 22=%d", op->stride_d); + } + fprintf_param_value(" 3=%d", pad_left) + { + if (op->pad_top != op->pad_left) fprintf(pp, " 13=%d", op->pad_top); + if (op->pad_front != op->pad_left) fprintf(pp, " 23=%d", op->pad_front); + } + { + if (op->pad_right != op->pad_left) fprintf(pp, " 14=%d", op->pad_right); + } + { + if (op->pad_bottom != op->pad_top) fprintf(pp, " 15=%d", op->pad_bottom); + } + { + if (op->pad_behind != op->pad_front) fprintf(pp, " 16=%d", op->pad_behind); + } + fprintf_param_value(" 4=%d", global_pooling) + fprintf_param_value(" 5=%d", pad_mode) + fprintf_param_value(" 6=%d", avgpool_count_include_pad) + fprintf_param_value(" 7=%d", adaptive_pooling) + fprintf_param_value(" 8=%d", out_w) + { + if (op->out_h != op->out_w) fprintf(pp, " 18=%d", op->out_h); + if (op->out_d != op->out_w) fprintf(pp, " 28=%d", op->out_d); + } + } else if (layer->type == "Power") { ncnn::Power* op = (ncnn::Power*)layer; @@ -1564,6 +1805,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) if (!op->axes.empty()) fprintf_param_int_array(3, op->axes, pp); } fprintf_param_value(" 4=%d", keepdims) + + // HACK + if (!op->axes.empty()) + { + int fixbug0 = 1; + fprintf(pp, " 5=%d", fixbug0); + } } else if (layer->type == "ReLU") {