diff --git a/tools/pnnx/README.md b/tools/pnnx/README.md index 7c30b68ba..882280d1f 100644 --- a/tools/pnnx/README.md +++ b/tools/pnnx/README.md @@ -93,6 +93,7 @@ Usage: pnnx [model.pt] [(key=value)...] ncnnparam=model.ncnn.param ncnnbin=model.ncnn.bin ncnnpy=model_ncnn.py + fp16=1 optlevel=2 device=cpu/gpu inputshape=[1,3,224,224],... @@ -119,6 +120,8 @@ Parameters: `ncnnpy` (default="*_ncnn.py"): pyncnn script for inference +`fp16` (default=1): save ncnn weight and onnx in fp16 data type + `optlevel` (default=2): graph optimization level | Option | Optimization level | diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index 38e0516c3..f2a9b6a9f 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -343,7 +343,6 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/insert_split.cpp pass_ncnn/chain_multi_output.cpp pass_ncnn/solve_batch_index.cpp - pass_ncnn/convert_to_fp16_model.cpp pass_ncnn/eliminate_noop.cpp pass_ncnn/eliminate_tail_reshape_permute.cpp diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 89c81d8d6..98066c4c5 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -168,6 +168,7 @@ static void show_usage() fprintf(stderr, " ncnnparam=model.ncnn.param\n"); fprintf(stderr, " ncnnbin=model.ncnn.bin\n"); fprintf(stderr, " ncnnpy=model_ncnn.py\n"); + fprintf(stderr, " fp16=1\n"); fprintf(stderr, " optlevel=2\n"); fprintf(stderr, " device=cpu/gpu\n"); fprintf(stderr, " inputshape=[1,3,224,224],...\n"); @@ -210,6 +211,7 @@ int main(int argc, char** argv) std::string ncnnparampath = ptbase + ".ncnn.param"; std::string ncnnbinpath = ptbase + ".ncnn.bin"; std::string ncnnpypath = ptbase + "_ncnn.py"; + int fp16 = 1; int optlevel = 2; std::string device = "cpu"; std::vector > input_shapes; @@ -250,6 +252,8 @@ int main(int argc, char** argv) ncnnbinpath = std::string(value); if (strcmp(key, "ncnnpy") == 0) ncnnpypath = std::string(value); + if (strcmp(key, "fp16") == 0) + fp16 = atoi(value); if (strcmp(key, "optlevel") == 0) optlevel = atoi(value); if (strcmp(key, "device") == 0) @@ -273,6 +277,7 @@ int main(int argc, char** argv) fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str()); fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str()); fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str()); + fprintf(stderr, "fp16 = %d\n", fp16); fprintf(stderr, "optlevel = %d\n", optlevel); fprintf(stderr, "device = %s\n", device.c_str()); fprintf(stderr, "inputshape = "); @@ -415,7 +420,7 @@ int main(int argc, char** argv) pnnx_graph.python(pnnxpypath, pnnxbinpath); #if BUILD_PNNX2ONNX - pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str()); + pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16); #else fprintf(stderr, "pnnx build without onnx-zero support, skip saving onnx\n"); #endif @@ -426,7 +431,7 @@ int main(int argc, char** argv) pnnx::pass_ncnn(pnnx_graph); - pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath); + pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16); } // pnnx::Graph pnnx_graph2; diff --git a/tools/pnnx/src/pass_ncnn.cpp b/tools/pnnx/src/pass_ncnn.cpp index c4a8d46af..14dedf0e8 100644 --- a/tools/pnnx/src/pass_ncnn.cpp +++ b/tools/pnnx/src/pass_ncnn.cpp @@ -31,7 +31,6 @@ #include "pass_ncnn/insert_split.h" #include "pass_ncnn/chain_multi_output.h" #include "pass_ncnn/solve_batch_index.h" -#include "pass_ncnn/convert_to_fp16_model.h" #include "pass_ncnn/eliminate_noop.h" #include "pass_ncnn/eliminate_tail_reshape_permute.h" @@ -134,8 +133,6 @@ void pass_ncnn(Graph& g) ncnn::convert_input(g); ncnn::eliminate_output(g); - - ncnn::convert_to_fp16_model(g); } } // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp deleted file mode 100644 index ee004b2b5..000000000 --- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// Tencent is pleased to support the open source community by making ncnn available. -// -// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except -// in compliance with the License. You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -#include "convert_to_fp16_model.h" - -namespace pnnx { - -namespace ncnn { - -static unsigned short float32_to_float16(float value) -{ - // 1 : 8 : 23 - union - { - unsigned int u; - float f; - } tmp; - - tmp.f = value; - - // 1 : 8 : 23 - unsigned short sign = (tmp.u & 0x80000000) >> 31; - unsigned short exponent = (tmp.u & 0x7F800000) >> 23; - unsigned int significand = tmp.u & 0x7FFFFF; - - // NCNN_LOGE("%d %d %d", sign, exponent, significand); - - // 1 : 5 : 10 - unsigned short fp16; - if (exponent == 0) - { - // zero or denormal, always underflow - fp16 = (sign << 15) | (0x00 << 10) | 0x00; - } - else if (exponent == 0xFF) - { - // infinity or NaN - fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); - } - else - { - // normalized - short newexp = exponent + (-127 + 15); - if (newexp >= 31) - { - // overflow, return infinity - fp16 = (sign << 15) | (0x1F << 10) | 0x00; - } - else if (newexp <= 0) - { - // Some normal fp32 cannot be expressed as normal fp16 - fp16 = (sign << 15) | (0x00 << 10) | 0x00; - } - else - { - // normal fp16 - fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); - } - } - - return fp16; -} - -static size_t alignSize(size_t sz, int n) -{ - return (sz + n - 1) & -n; -} - -void convert_to_fp16_model(Graph& graph) -{ - for (Operator* op : graph.ops) - { - bool is_type_flag_fp32 = false; - for (auto& it : op->attrs) - { - Attribute& attr = it.second; - - if (is_type_flag_fp32) - { - // fp32 -> fp16 - const float* p = (const float*)attr.data.data(); - int len = attr.data.size() / 4; - std::vector data_fp16(alignSize(len * 2, 4)); - unsigned short* p_fp16 = (unsigned short*)data_fp16.data(); - for (int i = 0; i < len; i++) - { - p_fp16[i] = float32_to_float16(p[i]); - } - - // pad size to 4bytes - if (len % 2 == 1) - { - // pad with fixed value for model hash consistency - p_fp16[len] = 0x2283; - } - - attr.type = 3; - attr.data = data_fp16; - - is_type_flag_fp32 = false; - continue; - } - - if (attr.type == 0 && attr.data == std::vector {0, 0, 0, 0}) - { - // write fp16 flag - // unsigned int fp16_flag = 0x01306B47; - attr.data[0] = 0x47; - attr.data[1] = 0x6B; - attr.data[2] = 0x30; - attr.data[3] = 0x01; - - is_type_flag_fp32 = true; - continue; - } - } - } -} - -} // namespace ncnn - -} // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h deleted file mode 100644 index 3f609d30c..000000000 --- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h +++ /dev/null @@ -1,25 +0,0 @@ -// Tencent is pleased to support the open source community by making ncnn available. -// -// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except -// in compliance with the License. You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -#include "pass_ncnn.h" - -namespace pnnx { - -namespace ncnn { - -void convert_to_fp16_model(Graph& graph); - -} // namespace ncnn - -} // namespace pnnx diff --git a/tools/pnnx/src/save_ncnn.cpp b/tools/pnnx/src/save_ncnn.cpp index b0710e9db..6a4407879 100644 --- a/tools/pnnx/src/save_ncnn.cpp +++ b/tools/pnnx/src/save_ncnn.cpp @@ -61,7 +61,66 @@ static bool string_is_positive_integer(const std::string& t) return true; } -int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath) +static unsigned short float32_to_float16(float value) +{ + // 1 : 8 : 23 + union + { + unsigned int u; + float f; + } tmp; + + tmp.f = value; + + // 1 : 8 : 23 + unsigned short sign = (tmp.u & 0x80000000) >> 31; + unsigned short exponent = (tmp.u & 0x7F800000) >> 23; + unsigned int significand = tmp.u & 0x7FFFFF; + + // NCNN_LOGE("%d %d %d", sign, exponent, significand); + + // 1 : 5 : 10 + unsigned short fp16; + if (exponent == 0) + { + // zero or denormal, always underflow + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } + else if (exponent == 0xFF) + { + // infinity or NaN + fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); + } + else + { + // normalized + short newexp = exponent + (-127 + 15); + if (newexp >= 31) + { + // overflow, return infinity + fp16 = (sign << 15) | (0x1F << 10) | 0x00; + } + else if (newexp <= 0) + { + // Some normal fp32 cannot be expressed as normal fp16 + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } + else + { + // normal fp16 + fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); + } + } + + return fp16; +} + +static size_t alignSize(size_t sz, int n) +{ + return (sz + n - 1) & -n; +} + +int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16) { FILE* paramfp = fopen(parampath.c_str(), "wb"); if (!paramfp) @@ -196,12 +255,48 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b } } + bool is_type_flag_fp32 = false; for (const auto& it : op->attrs) { // fprintf(paramfp, " @%s=", it.first.c_str()); const Attribute& attr = it.second; + if (fp16 && is_type_flag_fp32) + { + // fp32 -> fp16 + const float* p = (const float*)attr.data.data(); + int len = attr.data.size() / 4; + std::vector data_fp16(alignSize(len * 2, 4)); + unsigned short* p_fp16 = (unsigned short*)data_fp16.data(); + for (int i = 0; i < len; i++) + { + p_fp16[i] = float32_to_float16(p[i]); + } + + // pad size to 4bytes + if (len % 2 == 1) + { + // pad with fixed value for model hash consistency + p_fp16[len] = 0x2283; + } + + fwrite(data_fp16.data(), data_fp16.size(), 1, binfp); + + is_type_flag_fp32 = false; + continue; + } + + if (fp16 && attr.type == 0 && attr.data == std::vector {0, 0, 0, 0}) + { + // write fp16 flag + unsigned int fp16_flag = 0x01306B47; + fwrite((const char*)&fp16_flag, sizeof(fp16_flag), 1, binfp); + + is_type_flag_fp32 = true; + continue; + } + fwrite(attr.data.data(), attr.data.size(), 1, binfp); } diff --git a/tools/pnnx/src/save_ncnn.h b/tools/pnnx/src/save_ncnn.h index c49f506d3..458c14700 100644 --- a/tools/pnnx/src/save_ncnn.h +++ b/tools/pnnx/src/save_ncnn.h @@ -19,7 +19,7 @@ namespace pnnx { -int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath); +int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16); } // namespace pnnx diff --git a/tools/pnnx/src/save_onnx.cpp b/tools/pnnx/src/save_onnx.cpp index 86c64b904..55bb10cf7 100644 --- a/tools/pnnx/src/save_onnx.cpp +++ b/tools/pnnx/src/save_onnx.cpp @@ -33,7 +33,61 @@ extern const Attribute& get_operator_attr(const Operator* op, const char* key); extern const char* get_param_s(const Parameter& p); extern std::vector get_param_as(const Parameter& p); -int save_onnx(const Graph& g, const char* onnxpath) +static unsigned short float32_to_float16(float value) +{ + // 1 : 8 : 23 + union + { + unsigned int u; + float f; + } tmp; + + tmp.f = value; + + // 1 : 8 : 23 + unsigned short sign = (tmp.u & 0x80000000) >> 31; + unsigned short exponent = (tmp.u & 0x7F800000) >> 23; + unsigned int significand = tmp.u & 0x7FFFFF; + + // NCNN_LOGE("%d %d %d", sign, exponent, significand); + + // 1 : 5 : 10 + unsigned short fp16; + if (exponent == 0) + { + // zero or denormal, always underflow + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } + else if (exponent == 0xFF) + { + // infinity or NaN + fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); + } + else + { + // normalized + short newexp = exponent + (-127 + 15); + if (newexp >= 31) + { + // overflow, return infinity + fp16 = (sign << 15) | (0x1F << 10) | 0x00; + } + else if (newexp <= 0) + { + // Some normal fp32 cannot be expressed as normal fp16 + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } + else + { + // normal fp16 + fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); + } + } + + return fp16; +} + +int save_onnx(const Graph& g, const char* onnxpath, int fp16) { onnx::ModelProto model; @@ -52,10 +106,10 @@ int save_onnx(const Graph& g, const char* onnxpath) switch (x->type) { case 1: // f32 - tpt->set_elem_type(1); + tpt->set_elem_type(fp16 ? 10 : 1); break; case 2: // f64 - tpt->set_elem_type(11); + tpt->set_elem_type(fp16 ? 10 : 11); break; case 3: // f16 tpt->set_elem_type(10); @@ -120,16 +174,12 @@ int save_onnx(const Graph& g, const char* onnxpath) } std::vector params_keys = get_operator_params_keys(op); - - // for (const auto& it : op->params) for (const char* param_name : params_keys) { - // const Parameter& param = it.second; const Parameter& param = get_operator_param(op, param_name); onnx::AttributeProto* ap = np->add_attribute(); - // ap->set_name(get_param_name(it)); ap->set_name(param_name); if (param.type == 0) @@ -180,8 +230,6 @@ int save_onnx(const Graph& g, const char* onnxpath) } std::vector attrs_keys = get_operator_attrs_keys(op); - - // for (const auto& it : op->attrs) for (const char* attr_name : attrs_keys) { onnx::TensorProto* tp = gp->add_initializer(); @@ -190,7 +238,6 @@ int save_onnx(const Graph& g, const char* onnxpath) np->add_input(std::string(get_operator_name(op)) + "." + attr_name); - // const Attribute& attr = it.second; const Attribute& attr = get_operator_attr(op, attr_name); for (auto s : attr.shape) { @@ -200,10 +247,10 @@ int save_onnx(const Graph& g, const char* onnxpath) switch (attr.type) { case 1: // f32 - tp->set_data_type(1); + tp->set_data_type(fp16 ? 10 : 1); break; case 2: // f64 - tp->set_data_type(11); + tp->set_data_type(fp16 ? 10 : 11); break; case 3: // f16 tp->set_data_type(10); @@ -241,18 +288,36 @@ int save_onnx(const Graph& g, const char* onnxpath) } std::string* d = tp->mutable_raw_data(); - d->resize(attr.data.size()); - memcpy((void*)d->data(), attr.data.data(), attr.data.size()); + if (fp16 && attr.type == 1) + { + // fp32 to fp16 + const float* p = (const float*)attr.data.data(); + int len = attr.data.size() / 4; + d->resize(len * 2); + unsigned short* p_fp16 = (unsigned short*)d->data(); + for (int i = 0; i < len; i++) + { + p_fp16[i] = float32_to_float16(p[i]); + } + } + else if (fp16 && attr.type == 2) + { + // fp64 to fp16 + const double* p = (const double*)attr.data.data(); + int len = attr.data.size() / 4; + d->resize(len); + unsigned short* p_fp16 = (unsigned short*)d->data(); + for (int i = 0; i < len; i++) + { + p_fp16[i] = float32_to_float16((float)p[i]); + } + } + else + { + d->resize(attr.data.size()); + memcpy((void*)d->data(), attr.data.data(), attr.data.size()); + } } - - // if (op->inputnames.size() == op->inputs.size()) - // { - // for (size_t i = 0; i < op->inputs.size(); i++) - // { - // const Operand* oprand = op->inputs[i]; - // fprintf(paramfp, " $%s=%s", op->inputnames[i].c_str(), oprand->name.c_str()); - // } - // } } std::fstream output(onnxpath, std::ios::out | std::ios::trunc | std::ios::binary); diff --git a/tools/pnnx/src/save_onnx.h b/tools/pnnx/src/save_onnx.h index 236a9911e..9a4099872 100644 --- a/tools/pnnx/src/save_onnx.h +++ b/tools/pnnx/src/save_onnx.h @@ -19,7 +19,7 @@ namespace pnnx { -int save_onnx(const Graph& g, const char* onnxpath); +int save_onnx(const Graph& g, const char* onnxpath, int fp16); } // namespace pnnx