pnnx fp16 option for ncnn and onnx weight type (#4350)

3 years ago · ec1b07c9fe
--- a/tools/pnnx/README.md
+++ b/tools/pnnx/README.md
@@ -93,6 +93,7 @@ Usage: pnnx [model.pt] [(key=value)...]
  ncnnparam=model.ncnn.param
  ncnnbin=model.ncnn.bin
  ncnnpy=model_ncnn.py
  fp16=1
  optlevel=2
  device=cpu/gpu
  inputshape=[1,3,224,224],...
@@ -119,6 +120,8 @@ Parameters:

 `ncnnpy` (default="*_ncnn.py"): pyncnn script for inference

 `fp16` (default=1): save ncnn weight and onnx in fp16 data type

 `optlevel` (default=2): graph optimization level 

 | Option | Optimization level              |
--- a/tools/pnnx/src/CMakeLists.txt
+++ b/tools/pnnx/src/CMakeLists.txt
@@ -343,7 +343,6 @@ set(pnnx_pass_ncnn_SRCS
    pass_ncnn/insert_split.cpp
    pass_ncnn/chain_multi_output.cpp
    pass_ncnn/solve_batch_index.cpp
    pass_ncnn/convert_to_fp16_model.cpp

    pass_ncnn/eliminate_noop.cpp
    pass_ncnn/eliminate_tail_reshape_permute.cpp
--- a/tools/pnnx/src/main.cpp
+++ b/tools/pnnx/src/main.cpp
@@ -168,6 +168,7 @@ static void show_usage()
    fprintf(stderr, "  ncnnparam=model.ncnn.param\n");
    fprintf(stderr, "  ncnnbin=model.ncnn.bin\n");
    fprintf(stderr, "  ncnnpy=model_ncnn.py\n");
    fprintf(stderr, "  fp16=1\n");
    fprintf(stderr, "  optlevel=2\n");
    fprintf(stderr, "  device=cpu/gpu\n");
    fprintf(stderr, "  inputshape=[1,3,224,224],...\n");
@@ -210,6 +211,7 @@ int main(int argc, char** argv)
    std::string ncnnparampath = ptbase + ".ncnn.param";
    std::string ncnnbinpath = ptbase + ".ncnn.bin";
    std::string ncnnpypath = ptbase + "_ncnn.py";
    int fp16 = 1;
    int optlevel = 2;
    std::string device = "cpu";
    std::vector<std::vector<int64_t> > input_shapes;
@@ -250,6 +252,8 @@ int main(int argc, char** argv)
            ncnnbinpath = std::string(value);
        if (strcmp(key, "ncnnpy") == 0)
            ncnnpypath = std::string(value);
        if (strcmp(key, "fp16") == 0)
            fp16 = atoi(value);
        if (strcmp(key, "optlevel") == 0)
            optlevel = atoi(value);
        if (strcmp(key, "device") == 0)
@@ -273,6 +277,7 @@ int main(int argc, char** argv)
        fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str());
        fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str());
        fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str());
        fprintf(stderr, "fp16 = %d\n", fp16);
        fprintf(stderr, "optlevel = %d\n", optlevel);
        fprintf(stderr, "device = %s\n", device.c_str());
        fprintf(stderr, "inputshape = ");
@@ -415,7 +420,7 @@ int main(int argc, char** argv)
    pnnx_graph.python(pnnxpypath, pnnxbinpath);

 #if BUILD_PNNX2ONNX
    pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str());
    pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16);
 #else
    fprintf(stderr, "pnnx build without onnx-zero support, skip saving onnx\n");
 #endif
@@ -426,7 +431,7 @@ int main(int argc, char** argv)

        pnnx::pass_ncnn(pnnx_graph);

        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath);
        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16);
    }

    //     pnnx::Graph pnnx_graph2;
--- a/tools/pnnx/src/pass_ncnn.cpp
+++ b/tools/pnnx/src/pass_ncnn.cpp
@@ -31,7 +31,6 @@
 #include "pass_ncnn/insert_split.h"
 #include "pass_ncnn/chain_multi_output.h"
 #include "pass_ncnn/solve_batch_index.h"
 #include "pass_ncnn/convert_to_fp16_model.h"

 #include "pass_ncnn/eliminate_noop.h"
 #include "pass_ncnn/eliminate_tail_reshape_permute.h"
@@ -134,8 +133,6 @@ void pass_ncnn(Graph& g)
    ncnn::convert_input(g);

    ncnn::eliminate_output(g);

    ncnn::convert_to_fp16_model(g);
 }

 } // namespace pnnx
--- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp
+++ b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp
@@ -1,133 +0,0 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #include "convert_to_fp16_model.h"

 namespace pnnx {

 namespace ncnn {

 static unsigned short float32_to_float16(float value)
 {
    // 1 : 8 : 23
    union
    {
        unsigned int u;
        float f;
    } tmp;

    tmp.f = value;

    // 1 : 8 : 23
    unsigned short sign = (tmp.u & 0x80000000) >> 31;
    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
    unsigned int significand = tmp.u & 0x7FFFFF;

    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);

    // 1 : 5 : 10
    unsigned short fp16;
    if (exponent == 0)
    {
        // zero or denormal, always underflow
        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
    }
    else if (exponent == 0xFF)
    {
        // infinity or NaN
        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
    }
    else
    {
        // normalized
        short newexp = exponent + (-127 + 15);
        if (newexp >= 31)
        {
            // overflow, return infinity
            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
        }
        else if (newexp <= 0)
        {
            // Some normal fp32 cannot be expressed as normal fp16
            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
        }
        else
        {
            // normal fp16
            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
        }
    }

    return fp16;
 }

 static size_t alignSize(size_t sz, int n)
 {
    return (sz + n - 1) & -n;
 }

 void convert_to_fp16_model(Graph& graph)
 {
    for (Operator* op : graph.ops)
    {
        bool is_type_flag_fp32 = false;
        for (auto& it : op->attrs)
        {
            Attribute& attr = it.second;

            if (is_type_flag_fp32)
            {
                // fp32 -> fp16
                const float* p = (const float*)attr.data.data();
                int len = attr.data.size() / 4;
                std::vector<char> data_fp16(alignSize(len * 2, 4));
                unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
                for (int i = 0; i < len; i++)
                {
                    p_fp16[i] = float32_to_float16(p[i]);
                }

                // pad size to 4bytes
                if (len % 2 == 1)
                {
                    // pad with fixed value for model hash consistency
                    p_fp16[len] = 0x2283;
                }

                attr.type = 3;
                attr.data = data_fp16;

                is_type_flag_fp32 = false;
                continue;
            }

            if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
            {
                // write fp16 flag
                // unsigned int fp16_flag = 0x01306B47;
                attr.data[0] = 0x47;
                attr.data[1] = 0x6B;
                attr.data[2] = 0x30;
                attr.data[3] = 0x01;

                is_type_flag_fp32 = true;
                continue;
            }
        }
    }
 }

 } // namespace ncnn

 } // namespace pnnx
--- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h
+++ b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h
@@ -1,25 +0,0 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #include "pass_ncnn.h"

 namespace pnnx {

 namespace ncnn {

 void convert_to_fp16_model(Graph& graph);

 } // namespace ncnn

 } // namespace pnnx
--- a/tools/pnnx/src/save_ncnn.cpp
+++ b/tools/pnnx/src/save_ncnn.cpp
@@ -61,7 +61,66 @@ static bool string_is_positive_integer(const std::string& t)
    return true;
 }

 int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath)
 static unsigned short float32_to_float16(float value)
 {
    // 1 : 8 : 23
    union
    {
        unsigned int u;
        float f;
    } tmp;

    tmp.f = value;

    // 1 : 8 : 23
    unsigned short sign = (tmp.u & 0x80000000) >> 31;
    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
    unsigned int significand = tmp.u & 0x7FFFFF;

    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);

    // 1 : 5 : 10
    unsigned short fp16;
    if (exponent == 0)
    {
        // zero or denormal, always underflow
        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
    }
    else if (exponent == 0xFF)
    {
        // infinity or NaN
        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
    }
    else
    {
        // normalized
        short newexp = exponent + (-127 + 15);
        if (newexp >= 31)
        {
            // overflow, return infinity
            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
        }
        else if (newexp <= 0)
        {
            // Some normal fp32 cannot be expressed as normal fp16
            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
        }
        else
        {
            // normal fp16
            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
        }
    }

    return fp16;
 }

 static size_t alignSize(size_t sz, int n)
 {
    return (sz + n - 1) & -n;
 }

 int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16)
 {
    FILE* paramfp = fopen(parampath.c_str(), "wb");
    if (!paramfp)
@@ -196,12 +255,48 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b
            }
        }

        bool is_type_flag_fp32 = false;
        for (const auto& it : op->attrs)
        {
            //             fprintf(paramfp, " @%s=", it.first.c_str());

            const Attribute& attr = it.second;

            if (fp16 && is_type_flag_fp32)
            {
                // fp32 -> fp16
                const float* p = (const float*)attr.data.data();
                int len = attr.data.size() / 4;
                std::vector<char> data_fp16(alignSize(len * 2, 4));
                unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
                for (int i = 0; i < len; i++)
                {
                    p_fp16[i] = float32_to_float16(p[i]);
                }

                // pad size to 4bytes
                if (len % 2 == 1)
                {
                    // pad with fixed value for model hash consistency
                    p_fp16[len] = 0x2283;
                }

                fwrite(data_fp16.data(), data_fp16.size(), 1, binfp);

                is_type_flag_fp32 = false;
                continue;
            }

            if (fp16 && attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
            {
                // write fp16 flag
                unsigned int fp16_flag = 0x01306B47;
                fwrite((const char*)&fp16_flag, sizeof(fp16_flag), 1, binfp);

                is_type_flag_fp32 = true;
                continue;
            }

            fwrite(attr.data.data(), attr.data.size(), 1, binfp);
        }

--- a/tools/pnnx/src/save_ncnn.h
+++ b/tools/pnnx/src/save_ncnn.h
@@ -19,7 +19,7 @@

 namespace pnnx {

 int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath);
 int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16);

 } // namespace pnnx

--- a/tools/pnnx/src/save_onnx.cpp
+++ b/tools/pnnx/src/save_onnx.cpp
@@ -33,7 +33,61 @@ extern const Attribute& get_operator_attr(const Operator* op, const char* key);
 extern const char* get_param_s(const Parameter& p);
 extern std::vector<const char*> get_param_as(const Parameter& p);

 int save_onnx(const Graph& g, const char* onnxpath)
 static unsigned short float32_to_float16(float value)
 {
    // 1 : 8 : 23
    union
    {
        unsigned int u;
        float f;
    } tmp;

    tmp.f = value;

    // 1 : 8 : 23
    unsigned short sign = (tmp.u & 0x80000000) >> 31;
    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
    unsigned int significand = tmp.u & 0x7FFFFF;

    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);

    // 1 : 5 : 10
    unsigned short fp16;
    if (exponent == 0)
    {
        // zero or denormal, always underflow
        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
    }
    else if (exponent == 0xFF)
    {
        // infinity or NaN
        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
    }
    else
    {
        // normalized
        short newexp = exponent + (-127 + 15);
        if (newexp >= 31)
        {
            // overflow, return infinity
            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
        }
        else if (newexp <= 0)
        {
            // Some normal fp32 cannot be expressed as normal fp16
            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
        }
        else
        {
            // normal fp16
            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
        }
    }

    return fp16;
 }

 int save_onnx(const Graph& g, const char* onnxpath, int fp16)
 {
    onnx::ModelProto model;

@@ -52,10 +106,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
        switch (x->type)
        {
        case 1: // f32
            tpt->set_elem_type(1);
            tpt->set_elem_type(fp16 ? 10 : 1);
            break;
        case 2: // f64
            tpt->set_elem_type(11);
            tpt->set_elem_type(fp16 ? 10 : 11);
            break;
        case 3: // f16
            tpt->set_elem_type(10);
@@ -120,16 +174,12 @@ int save_onnx(const Graph& g, const char* onnxpath)
        }

        std::vector<const char*> params_keys = get_operator_params_keys(op);

        // for (const auto& it : op->params)
        for (const char* param_name : params_keys)
        {
            // const Parameter& param = it.second;
            const Parameter& param = get_operator_param(op, param_name);

            onnx::AttributeProto* ap = np->add_attribute();

            // ap->set_name(get_param_name(it));
            ap->set_name(param_name);

            if (param.type == 0)
@@ -180,8 +230,6 @@ int save_onnx(const Graph& g, const char* onnxpath)
        }

        std::vector<const char*> attrs_keys = get_operator_attrs_keys(op);

        // for (const auto& it : op->attrs)
        for (const char* attr_name : attrs_keys)
        {
            onnx::TensorProto* tp = gp->add_initializer();
@@ -190,7 +238,6 @@ int save_onnx(const Graph& g, const char* onnxpath)

            np->add_input(std::string(get_operator_name(op)) + "." + attr_name);

            // const Attribute& attr = it.second;
            const Attribute& attr = get_operator_attr(op, attr_name);
            for (auto s : attr.shape)
            {
@@ -200,10 +247,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
            switch (attr.type)
            {
            case 1: // f32
                tp->set_data_type(1);
                tp->set_data_type(fp16 ? 10 : 1);
                break;
            case 2: // f64
                tp->set_data_type(11);
                tp->set_data_type(fp16 ? 10 : 11);
                break;
            case 3: // f16
                tp->set_data_type(10);
@@ -241,18 +288,36 @@ int save_onnx(const Graph& g, const char* onnxpath)
            }

            std::string* d = tp->mutable_raw_data();
            d->resize(attr.data.size());
            memcpy((void*)d->data(), attr.data.data(), attr.data.size());
            if (fp16 && attr.type == 1)
            {
                // fp32 to fp16
                const float* p = (const float*)attr.data.data();
                int len = attr.data.size() / 4;
                d->resize(len * 2);
                unsigned short* p_fp16 = (unsigned short*)d->data();
                for (int i = 0; i < len; i++)
                {
                    p_fp16[i] = float32_to_float16(p[i]);
                }
            }
            else if (fp16 && attr.type == 2)
            {
                // fp64 to fp16
                const double* p = (const double*)attr.data.data();
                int len = attr.data.size() / 4;
                d->resize(len);
                unsigned short* p_fp16 = (unsigned short*)d->data();
                for (int i = 0; i < len; i++)
                {
                    p_fp16[i] = float32_to_float16((float)p[i]);
                }
            }
            else
            {
                d->resize(attr.data.size());
                memcpy((void*)d->data(), attr.data.data(), attr.data.size());
            }
        }

        //         if (op->inputnames.size() == op->inputs.size())
        //         {
        //             for (size_t i = 0; i < op->inputs.size(); i++)
        //             {
        //                 const Operand* oprand = op->inputs[i];
        //                 fprintf(paramfp, " $%s=%s", op->inputnames[i].c_str(), oprand->name.c_str());
        //             }
        //         }
    }

    std::fstream output(onnxpath, std::ios::out | std::ios::trunc | std::ios::binary);
--- a/tools/pnnx/src/save_onnx.h
+++ b/tools/pnnx/src/save_onnx.h
@@ -19,7 +19,7 @@

 namespace pnnx {

 int save_onnx(const Graph& g, const char* onnxpath);
 int save_onnx(const Graph& g, const char* onnxpath, int fp16);

 } // namespace pnnx