| @@ -93,6 +93,7 @@ Usage: pnnx [model.pt] [(key=value)...] | |||
| ncnnparam=model.ncnn.param | |||
| ncnnbin=model.ncnn.bin | |||
| ncnnpy=model_ncnn.py | |||
| fp16=1 | |||
| optlevel=2 | |||
| device=cpu/gpu | |||
| inputshape=[1,3,224,224],... | |||
| @@ -119,6 +120,8 @@ Parameters: | |||
| `ncnnpy` (default="*_ncnn.py"): pyncnn script for inference | |||
| `fp16` (default=1): save ncnn weight and onnx in fp16 data type | |||
| `optlevel` (default=2): graph optimization level | |||
| | Option | Optimization level | | |||
| @@ -343,7 +343,6 @@ set(pnnx_pass_ncnn_SRCS | |||
| pass_ncnn/insert_split.cpp | |||
| pass_ncnn/chain_multi_output.cpp | |||
| pass_ncnn/solve_batch_index.cpp | |||
| pass_ncnn/convert_to_fp16_model.cpp | |||
| pass_ncnn/eliminate_noop.cpp | |||
| pass_ncnn/eliminate_tail_reshape_permute.cpp | |||
| @@ -168,6 +168,7 @@ static void show_usage() | |||
| fprintf(stderr, " ncnnparam=model.ncnn.param\n"); | |||
| fprintf(stderr, " ncnnbin=model.ncnn.bin\n"); | |||
| fprintf(stderr, " ncnnpy=model_ncnn.py\n"); | |||
| fprintf(stderr, " fp16=1\n"); | |||
| fprintf(stderr, " optlevel=2\n"); | |||
| fprintf(stderr, " device=cpu/gpu\n"); | |||
| fprintf(stderr, " inputshape=[1,3,224,224],...\n"); | |||
| @@ -210,6 +211,7 @@ int main(int argc, char** argv) | |||
| std::string ncnnparampath = ptbase + ".ncnn.param"; | |||
| std::string ncnnbinpath = ptbase + ".ncnn.bin"; | |||
| std::string ncnnpypath = ptbase + "_ncnn.py"; | |||
| int fp16 = 1; | |||
| int optlevel = 2; | |||
| std::string device = "cpu"; | |||
| std::vector<std::vector<int64_t> > input_shapes; | |||
| @@ -250,6 +252,8 @@ int main(int argc, char** argv) | |||
| ncnnbinpath = std::string(value); | |||
| if (strcmp(key, "ncnnpy") == 0) | |||
| ncnnpypath = std::string(value); | |||
| if (strcmp(key, "fp16") == 0) | |||
| fp16 = atoi(value); | |||
| if (strcmp(key, "optlevel") == 0) | |||
| optlevel = atoi(value); | |||
| if (strcmp(key, "device") == 0) | |||
| @@ -273,6 +277,7 @@ int main(int argc, char** argv) | |||
| fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str()); | |||
| fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str()); | |||
| fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str()); | |||
| fprintf(stderr, "fp16 = %d\n", fp16); | |||
| fprintf(stderr, "optlevel = %d\n", optlevel); | |||
| fprintf(stderr, "device = %s\n", device.c_str()); | |||
| fprintf(stderr, "inputshape = "); | |||
| @@ -415,7 +420,7 @@ int main(int argc, char** argv) | |||
| pnnx_graph.python(pnnxpypath, pnnxbinpath); | |||
| #if BUILD_PNNX2ONNX | |||
| pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str()); | |||
| pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16); | |||
| #else | |||
| fprintf(stderr, "pnnx build without onnx-zero support, skip saving onnx\n"); | |||
| #endif | |||
| @@ -426,7 +431,7 @@ int main(int argc, char** argv) | |||
| pnnx::pass_ncnn(pnnx_graph); | |||
| pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath); | |||
| pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16); | |||
| } | |||
| // pnnx::Graph pnnx_graph2; | |||
| @@ -31,7 +31,6 @@ | |||
| #include "pass_ncnn/insert_split.h" | |||
| #include "pass_ncnn/chain_multi_output.h" | |||
| #include "pass_ncnn/solve_batch_index.h" | |||
| #include "pass_ncnn/convert_to_fp16_model.h" | |||
| #include "pass_ncnn/eliminate_noop.h" | |||
| #include "pass_ncnn/eliminate_tail_reshape_permute.h" | |||
| @@ -134,8 +133,6 @@ void pass_ncnn(Graph& g) | |||
| ncnn::convert_input(g); | |||
| ncnn::eliminate_output(g); | |||
| ncnn::convert_to_fp16_model(g); | |||
| } | |||
| } // namespace pnnx | |||
| @@ -1,133 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #include "convert_to_fp16_model.h" | |||
| namespace pnnx { | |||
| namespace ncnn { | |||
| static unsigned short float32_to_float16(float value) | |||
| { | |||
| // 1 : 8 : 23 | |||
| union | |||
| { | |||
| unsigned int u; | |||
| float f; | |||
| } tmp; | |||
| tmp.f = value; | |||
| // 1 : 8 : 23 | |||
| unsigned short sign = (tmp.u & 0x80000000) >> 31; | |||
| unsigned short exponent = (tmp.u & 0x7F800000) >> 23; | |||
| unsigned int significand = tmp.u & 0x7FFFFF; | |||
| // NCNN_LOGE("%d %d %d", sign, exponent, significand); | |||
| // 1 : 5 : 10 | |||
| unsigned short fp16; | |||
| if (exponent == 0) | |||
| { | |||
| // zero or denormal, always underflow | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else if (exponent == 0xFF) | |||
| { | |||
| // infinity or NaN | |||
| fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); | |||
| } | |||
| else | |||
| { | |||
| // normalized | |||
| short newexp = exponent + (-127 + 15); | |||
| if (newexp >= 31) | |||
| { | |||
| // overflow, return infinity | |||
| fp16 = (sign << 15) | (0x1F << 10) | 0x00; | |||
| } | |||
| else if (newexp <= 0) | |||
| { | |||
| // Some normal fp32 cannot be expressed as normal fp16 | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else | |||
| { | |||
| // normal fp16 | |||
| fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); | |||
| } | |||
| } | |||
| return fp16; | |||
| } | |||
| static size_t alignSize(size_t sz, int n) | |||
| { | |||
| return (sz + n - 1) & -n; | |||
| } | |||
| void convert_to_fp16_model(Graph& graph) | |||
| { | |||
| for (Operator* op : graph.ops) | |||
| { | |||
| bool is_type_flag_fp32 = false; | |||
| for (auto& it : op->attrs) | |||
| { | |||
| Attribute& attr = it.second; | |||
| if (is_type_flag_fp32) | |||
| { | |||
| // fp32 -> fp16 | |||
| const float* p = (const float*)attr.data.data(); | |||
| int len = attr.data.size() / 4; | |||
| std::vector<char> data_fp16(alignSize(len * 2, 4)); | |||
| unsigned short* p_fp16 = (unsigned short*)data_fp16.data(); | |||
| for (int i = 0; i < len; i++) | |||
| { | |||
| p_fp16[i] = float32_to_float16(p[i]); | |||
| } | |||
| // pad size to 4bytes | |||
| if (len % 2 == 1) | |||
| { | |||
| // pad with fixed value for model hash consistency | |||
| p_fp16[len] = 0x2283; | |||
| } | |||
| attr.type = 3; | |||
| attr.data = data_fp16; | |||
| is_type_flag_fp32 = false; | |||
| continue; | |||
| } | |||
| if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0}) | |||
| { | |||
| // write fp16 flag | |||
| // unsigned int fp16_flag = 0x01306B47; | |||
| attr.data[0] = 0x47; | |||
| attr.data[1] = 0x6B; | |||
| attr.data[2] = 0x30; | |||
| attr.data[3] = 0x01; | |||
| is_type_flag_fp32 = true; | |||
| continue; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } // namespace ncnn | |||
| } // namespace pnnx | |||
| @@ -1,25 +0,0 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #include "pass_ncnn.h" | |||
| namespace pnnx { | |||
| namespace ncnn { | |||
| void convert_to_fp16_model(Graph& graph); | |||
| } // namespace ncnn | |||
| } // namespace pnnx | |||
| @@ -61,7 +61,66 @@ static bool string_is_positive_integer(const std::string& t) | |||
| return true; | |||
| } | |||
| int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath) | |||
| static unsigned short float32_to_float16(float value) | |||
| { | |||
| // 1 : 8 : 23 | |||
| union | |||
| { | |||
| unsigned int u; | |||
| float f; | |||
| } tmp; | |||
| tmp.f = value; | |||
| // 1 : 8 : 23 | |||
| unsigned short sign = (tmp.u & 0x80000000) >> 31; | |||
| unsigned short exponent = (tmp.u & 0x7F800000) >> 23; | |||
| unsigned int significand = tmp.u & 0x7FFFFF; | |||
| // NCNN_LOGE("%d %d %d", sign, exponent, significand); | |||
| // 1 : 5 : 10 | |||
| unsigned short fp16; | |||
| if (exponent == 0) | |||
| { | |||
| // zero or denormal, always underflow | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else if (exponent == 0xFF) | |||
| { | |||
| // infinity or NaN | |||
| fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); | |||
| } | |||
| else | |||
| { | |||
| // normalized | |||
| short newexp = exponent + (-127 + 15); | |||
| if (newexp >= 31) | |||
| { | |||
| // overflow, return infinity | |||
| fp16 = (sign << 15) | (0x1F << 10) | 0x00; | |||
| } | |||
| else if (newexp <= 0) | |||
| { | |||
| // Some normal fp32 cannot be expressed as normal fp16 | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else | |||
| { | |||
| // normal fp16 | |||
| fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); | |||
| } | |||
| } | |||
| return fp16; | |||
| } | |||
| static size_t alignSize(size_t sz, int n) | |||
| { | |||
| return (sz + n - 1) & -n; | |||
| } | |||
| int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16) | |||
| { | |||
| FILE* paramfp = fopen(parampath.c_str(), "wb"); | |||
| if (!paramfp) | |||
| @@ -196,12 +255,48 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b | |||
| } | |||
| } | |||
| bool is_type_flag_fp32 = false; | |||
| for (const auto& it : op->attrs) | |||
| { | |||
| // fprintf(paramfp, " @%s=", it.first.c_str()); | |||
| const Attribute& attr = it.second; | |||
| if (fp16 && is_type_flag_fp32) | |||
| { | |||
| // fp32 -> fp16 | |||
| const float* p = (const float*)attr.data.data(); | |||
| int len = attr.data.size() / 4; | |||
| std::vector<char> data_fp16(alignSize(len * 2, 4)); | |||
| unsigned short* p_fp16 = (unsigned short*)data_fp16.data(); | |||
| for (int i = 0; i < len; i++) | |||
| { | |||
| p_fp16[i] = float32_to_float16(p[i]); | |||
| } | |||
| // pad size to 4bytes | |||
| if (len % 2 == 1) | |||
| { | |||
| // pad with fixed value for model hash consistency | |||
| p_fp16[len] = 0x2283; | |||
| } | |||
| fwrite(data_fp16.data(), data_fp16.size(), 1, binfp); | |||
| is_type_flag_fp32 = false; | |||
| continue; | |||
| } | |||
| if (fp16 && attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0}) | |||
| { | |||
| // write fp16 flag | |||
| unsigned int fp16_flag = 0x01306B47; | |||
| fwrite((const char*)&fp16_flag, sizeof(fp16_flag), 1, binfp); | |||
| is_type_flag_fp32 = true; | |||
| continue; | |||
| } | |||
| fwrite(attr.data.data(), attr.data.size(), 1, binfp); | |||
| } | |||
| @@ -19,7 +19,7 @@ | |||
| namespace pnnx { | |||
| int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath); | |||
| int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16); | |||
| } // namespace pnnx | |||
| @@ -33,7 +33,61 @@ extern const Attribute& get_operator_attr(const Operator* op, const char* key); | |||
| extern const char* get_param_s(const Parameter& p); | |||
| extern std::vector<const char*> get_param_as(const Parameter& p); | |||
| int save_onnx(const Graph& g, const char* onnxpath) | |||
| static unsigned short float32_to_float16(float value) | |||
| { | |||
| // 1 : 8 : 23 | |||
| union | |||
| { | |||
| unsigned int u; | |||
| float f; | |||
| } tmp; | |||
| tmp.f = value; | |||
| // 1 : 8 : 23 | |||
| unsigned short sign = (tmp.u & 0x80000000) >> 31; | |||
| unsigned short exponent = (tmp.u & 0x7F800000) >> 23; | |||
| unsigned int significand = tmp.u & 0x7FFFFF; | |||
| // NCNN_LOGE("%d %d %d", sign, exponent, significand); | |||
| // 1 : 5 : 10 | |||
| unsigned short fp16; | |||
| if (exponent == 0) | |||
| { | |||
| // zero or denormal, always underflow | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else if (exponent == 0xFF) | |||
| { | |||
| // infinity or NaN | |||
| fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); | |||
| } | |||
| else | |||
| { | |||
| // normalized | |||
| short newexp = exponent + (-127 + 15); | |||
| if (newexp >= 31) | |||
| { | |||
| // overflow, return infinity | |||
| fp16 = (sign << 15) | (0x1F << 10) | 0x00; | |||
| } | |||
| else if (newexp <= 0) | |||
| { | |||
| // Some normal fp32 cannot be expressed as normal fp16 | |||
| fp16 = (sign << 15) | (0x00 << 10) | 0x00; | |||
| } | |||
| else | |||
| { | |||
| // normal fp16 | |||
| fp16 = (sign << 15) | (newexp << 10) | (significand >> 13); | |||
| } | |||
| } | |||
| return fp16; | |||
| } | |||
| int save_onnx(const Graph& g, const char* onnxpath, int fp16) | |||
| { | |||
| onnx::ModelProto model; | |||
| @@ -52,10 +106,10 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| switch (x->type) | |||
| { | |||
| case 1: // f32 | |||
| tpt->set_elem_type(1); | |||
| tpt->set_elem_type(fp16 ? 10 : 1); | |||
| break; | |||
| case 2: // f64 | |||
| tpt->set_elem_type(11); | |||
| tpt->set_elem_type(fp16 ? 10 : 11); | |||
| break; | |||
| case 3: // f16 | |||
| tpt->set_elem_type(10); | |||
| @@ -120,16 +174,12 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| } | |||
| std::vector<const char*> params_keys = get_operator_params_keys(op); | |||
| // for (const auto& it : op->params) | |||
| for (const char* param_name : params_keys) | |||
| { | |||
| // const Parameter& param = it.second; | |||
| const Parameter& param = get_operator_param(op, param_name); | |||
| onnx::AttributeProto* ap = np->add_attribute(); | |||
| // ap->set_name(get_param_name(it)); | |||
| ap->set_name(param_name); | |||
| if (param.type == 0) | |||
| @@ -180,8 +230,6 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| } | |||
| std::vector<const char*> attrs_keys = get_operator_attrs_keys(op); | |||
| // for (const auto& it : op->attrs) | |||
| for (const char* attr_name : attrs_keys) | |||
| { | |||
| onnx::TensorProto* tp = gp->add_initializer(); | |||
| @@ -190,7 +238,6 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| np->add_input(std::string(get_operator_name(op)) + "." + attr_name); | |||
| // const Attribute& attr = it.second; | |||
| const Attribute& attr = get_operator_attr(op, attr_name); | |||
| for (auto s : attr.shape) | |||
| { | |||
| @@ -200,10 +247,10 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| switch (attr.type) | |||
| { | |||
| case 1: // f32 | |||
| tp->set_data_type(1); | |||
| tp->set_data_type(fp16 ? 10 : 1); | |||
| break; | |||
| case 2: // f64 | |||
| tp->set_data_type(11); | |||
| tp->set_data_type(fp16 ? 10 : 11); | |||
| break; | |||
| case 3: // f16 | |||
| tp->set_data_type(10); | |||
| @@ -241,18 +288,36 @@ int save_onnx(const Graph& g, const char* onnxpath) | |||
| } | |||
| std::string* d = tp->mutable_raw_data(); | |||
| d->resize(attr.data.size()); | |||
| memcpy((void*)d->data(), attr.data.data(), attr.data.size()); | |||
| if (fp16 && attr.type == 1) | |||
| { | |||
| // fp32 to fp16 | |||
| const float* p = (const float*)attr.data.data(); | |||
| int len = attr.data.size() / 4; | |||
| d->resize(len * 2); | |||
| unsigned short* p_fp16 = (unsigned short*)d->data(); | |||
| for (int i = 0; i < len; i++) | |||
| { | |||
| p_fp16[i] = float32_to_float16(p[i]); | |||
| } | |||
| } | |||
| else if (fp16 && attr.type == 2) | |||
| { | |||
| // fp64 to fp16 | |||
| const double* p = (const double*)attr.data.data(); | |||
| int len = attr.data.size() / 4; | |||
| d->resize(len); | |||
| unsigned short* p_fp16 = (unsigned short*)d->data(); | |||
| for (int i = 0; i < len; i++) | |||
| { | |||
| p_fp16[i] = float32_to_float16((float)p[i]); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| d->resize(attr.data.size()); | |||
| memcpy((void*)d->data(), attr.data.data(), attr.data.size()); | |||
| } | |||
| } | |||
| // if (op->inputnames.size() == op->inputs.size()) | |||
| // { | |||
| // for (size_t i = 0; i < op->inputs.size(); i++) | |||
| // { | |||
| // const Operand* oprand = op->inputs[i]; | |||
| // fprintf(paramfp, " $%s=%s", op->inputnames[i].c_str(), oprand->name.c_str()); | |||
| // } | |||
| // } | |||
| } | |||
| std::fstream output(onnxpath, std::ios::out | std::ios::trunc | std::ios::binary); | |||
| @@ -19,7 +19,7 @@ | |||
| namespace pnnx { | |||
| int save_onnx(const Graph& g, const char* onnxpath); | |||
| int save_onnx(const Graph& g, const char* onnxpath, int fp16); | |||
| } // namespace pnnx | |||