Browse Source

pnnx fp16 option for ncnn and onnx weight type (#4350)

tags/20221128
nihui GitHub 3 years ago
parent
commit
ec1b07c9fe
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 196 additions and 190 deletions
  1. +3
    -0
      tools/pnnx/README.md
  2. +0
    -1
      tools/pnnx/src/CMakeLists.txt
  3. +7
    -2
      tools/pnnx/src/main.cpp
  4. +0
    -3
      tools/pnnx/src/pass_ncnn.cpp
  5. +0
    -133
      tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp
  6. +0
    -25
      tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h
  7. +96
    -1
      tools/pnnx/src/save_ncnn.cpp
  8. +1
    -1
      tools/pnnx/src/save_ncnn.h
  9. +88
    -23
      tools/pnnx/src/save_onnx.cpp
  10. +1
    -1
      tools/pnnx/src/save_onnx.h

+ 3
- 0
tools/pnnx/README.md View File

@@ -93,6 +93,7 @@ Usage: pnnx [model.pt] [(key=value)...]
ncnnparam=model.ncnn.param
ncnnbin=model.ncnn.bin
ncnnpy=model_ncnn.py
fp16=1
optlevel=2
device=cpu/gpu
inputshape=[1,3,224,224],...
@@ -119,6 +120,8 @@ Parameters:

`ncnnpy` (default="*_ncnn.py"): pyncnn script for inference

`fp16` (default=1): save ncnn weight and onnx in fp16 data type

`optlevel` (default=2): graph optimization level

| Option | Optimization level |


+ 0
- 1
tools/pnnx/src/CMakeLists.txt View File

@@ -343,7 +343,6 @@ set(pnnx_pass_ncnn_SRCS
pass_ncnn/insert_split.cpp
pass_ncnn/chain_multi_output.cpp
pass_ncnn/solve_batch_index.cpp
pass_ncnn/convert_to_fp16_model.cpp

pass_ncnn/eliminate_noop.cpp
pass_ncnn/eliminate_tail_reshape_permute.cpp


+ 7
- 2
tools/pnnx/src/main.cpp View File

@@ -168,6 +168,7 @@ static void show_usage()
fprintf(stderr, " ncnnparam=model.ncnn.param\n");
fprintf(stderr, " ncnnbin=model.ncnn.bin\n");
fprintf(stderr, " ncnnpy=model_ncnn.py\n");
fprintf(stderr, " fp16=1\n");
fprintf(stderr, " optlevel=2\n");
fprintf(stderr, " device=cpu/gpu\n");
fprintf(stderr, " inputshape=[1,3,224,224],...\n");
@@ -210,6 +211,7 @@ int main(int argc, char** argv)
std::string ncnnparampath = ptbase + ".ncnn.param";
std::string ncnnbinpath = ptbase + ".ncnn.bin";
std::string ncnnpypath = ptbase + "_ncnn.py";
int fp16 = 1;
int optlevel = 2;
std::string device = "cpu";
std::vector<std::vector<int64_t> > input_shapes;
@@ -250,6 +252,8 @@ int main(int argc, char** argv)
ncnnbinpath = std::string(value);
if (strcmp(key, "ncnnpy") == 0)
ncnnpypath = std::string(value);
if (strcmp(key, "fp16") == 0)
fp16 = atoi(value);
if (strcmp(key, "optlevel") == 0)
optlevel = atoi(value);
if (strcmp(key, "device") == 0)
@@ -273,6 +277,7 @@ int main(int argc, char** argv)
fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str());
fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str());
fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str());
fprintf(stderr, "fp16 = %d\n", fp16);
fprintf(stderr, "optlevel = %d\n", optlevel);
fprintf(stderr, "device = %s\n", device.c_str());
fprintf(stderr, "inputshape = ");
@@ -415,7 +420,7 @@ int main(int argc, char** argv)
pnnx_graph.python(pnnxpypath, pnnxbinpath);

#if BUILD_PNNX2ONNX
pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str());
pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16);
#else
fprintf(stderr, "pnnx build without onnx-zero support, skip saving onnx\n");
#endif
@@ -426,7 +431,7 @@ int main(int argc, char** argv)

pnnx::pass_ncnn(pnnx_graph);

pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath);
pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16);
}

// pnnx::Graph pnnx_graph2;


+ 0
- 3
tools/pnnx/src/pass_ncnn.cpp View File

@@ -31,7 +31,6 @@
#include "pass_ncnn/insert_split.h"
#include "pass_ncnn/chain_multi_output.h"
#include "pass_ncnn/solve_batch_index.h"
#include "pass_ncnn/convert_to_fp16_model.h"

#include "pass_ncnn/eliminate_noop.h"
#include "pass_ncnn/eliminate_tail_reshape_permute.h"
@@ -134,8 +133,6 @@ void pass_ncnn(Graph& g)
ncnn::convert_input(g);

ncnn::eliminate_output(g);

ncnn::convert_to_fp16_model(g);
}

} // namespace pnnx

+ 0
- 133
tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp View File

@@ -1,133 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "convert_to_fp16_model.h"

namespace pnnx {

namespace ncnn {

static unsigned short float32_to_float16(float value)
{
// 1 : 8 : 23
union
{
unsigned int u;
float f;
} tmp;

tmp.f = value;

// 1 : 8 : 23
unsigned short sign = (tmp.u & 0x80000000) >> 31;
unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
unsigned int significand = tmp.u & 0x7FFFFF;

// NCNN_LOGE("%d %d %d", sign, exponent, significand);

// 1 : 5 : 10
unsigned short fp16;
if (exponent == 0)
{
// zero or denormal, always underflow
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else if (exponent == 0xFF)
{
// infinity or NaN
fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
}
else
{
// normalized
short newexp = exponent + (-127 + 15);
if (newexp >= 31)
{
// overflow, return infinity
fp16 = (sign << 15) | (0x1F << 10) | 0x00;
}
else if (newexp <= 0)
{
// Some normal fp32 cannot be expressed as normal fp16
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else
{
// normal fp16
fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
}
}

return fp16;
}

static size_t alignSize(size_t sz, int n)
{
return (sz + n - 1) & -n;
}

void convert_to_fp16_model(Graph& graph)
{
for (Operator* op : graph.ops)
{
bool is_type_flag_fp32 = false;
for (auto& it : op->attrs)
{
Attribute& attr = it.second;

if (is_type_flag_fp32)
{
// fp32 -> fp16
const float* p = (const float*)attr.data.data();
int len = attr.data.size() / 4;
std::vector<char> data_fp16(alignSize(len * 2, 4));
unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
for (int i = 0; i < len; i++)
{
p_fp16[i] = float32_to_float16(p[i]);
}

// pad size to 4bytes
if (len % 2 == 1)
{
// pad with fixed value for model hash consistency
p_fp16[len] = 0x2283;
}

attr.type = 3;
attr.data = data_fp16;

is_type_flag_fp32 = false;
continue;
}

if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
{
// write fp16 flag
// unsigned int fp16_flag = 0x01306B47;
attr.data[0] = 0x47;
attr.data[1] = 0x6B;
attr.data[2] = 0x30;
attr.data[3] = 0x01;

is_type_flag_fp32 = true;
continue;
}
}
}
}

} // namespace ncnn

} // namespace pnnx

+ 0
- 25
tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h View File

@@ -1,25 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "pass_ncnn.h"

namespace pnnx {

namespace ncnn {

void convert_to_fp16_model(Graph& graph);

} // namespace ncnn

} // namespace pnnx

+ 96
- 1
tools/pnnx/src/save_ncnn.cpp View File

@@ -61,7 +61,66 @@ static bool string_is_positive_integer(const std::string& t)
return true;
}

int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath)
static unsigned short float32_to_float16(float value)
{
// 1 : 8 : 23
union
{
unsigned int u;
float f;
} tmp;

tmp.f = value;

// 1 : 8 : 23
unsigned short sign = (tmp.u & 0x80000000) >> 31;
unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
unsigned int significand = tmp.u & 0x7FFFFF;

// NCNN_LOGE("%d %d %d", sign, exponent, significand);

// 1 : 5 : 10
unsigned short fp16;
if (exponent == 0)
{
// zero or denormal, always underflow
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else if (exponent == 0xFF)
{
// infinity or NaN
fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
}
else
{
// normalized
short newexp = exponent + (-127 + 15);
if (newexp >= 31)
{
// overflow, return infinity
fp16 = (sign << 15) | (0x1F << 10) | 0x00;
}
else if (newexp <= 0)
{
// Some normal fp32 cannot be expressed as normal fp16
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else
{
// normal fp16
fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
}
}

return fp16;
}

static size_t alignSize(size_t sz, int n)
{
return (sz + n - 1) & -n;
}

int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16)
{
FILE* paramfp = fopen(parampath.c_str(), "wb");
if (!paramfp)
@@ -196,12 +255,48 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b
}
}

bool is_type_flag_fp32 = false;
for (const auto& it : op->attrs)
{
// fprintf(paramfp, " @%s=", it.first.c_str());

const Attribute& attr = it.second;

if (fp16 && is_type_flag_fp32)
{
// fp32 -> fp16
const float* p = (const float*)attr.data.data();
int len = attr.data.size() / 4;
std::vector<char> data_fp16(alignSize(len * 2, 4));
unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
for (int i = 0; i < len; i++)
{
p_fp16[i] = float32_to_float16(p[i]);
}

// pad size to 4bytes
if (len % 2 == 1)
{
// pad with fixed value for model hash consistency
p_fp16[len] = 0x2283;
}

fwrite(data_fp16.data(), data_fp16.size(), 1, binfp);

is_type_flag_fp32 = false;
continue;
}

if (fp16 && attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
{
// write fp16 flag
unsigned int fp16_flag = 0x01306B47;
fwrite((const char*)&fp16_flag, sizeof(fp16_flag), 1, binfp);

is_type_flag_fp32 = true;
continue;
}

fwrite(attr.data.data(), attr.data.size(), 1, binfp);
}



+ 1
- 1
tools/pnnx/src/save_ncnn.h View File

@@ -19,7 +19,7 @@

namespace pnnx {

int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath);
int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16);

} // namespace pnnx



+ 88
- 23
tools/pnnx/src/save_onnx.cpp View File

@@ -33,7 +33,61 @@ extern const Attribute& get_operator_attr(const Operator* op, const char* key);
extern const char* get_param_s(const Parameter& p);
extern std::vector<const char*> get_param_as(const Parameter& p);

int save_onnx(const Graph& g, const char* onnxpath)
static unsigned short float32_to_float16(float value)
{
// 1 : 8 : 23
union
{
unsigned int u;
float f;
} tmp;

tmp.f = value;

// 1 : 8 : 23
unsigned short sign = (tmp.u & 0x80000000) >> 31;
unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
unsigned int significand = tmp.u & 0x7FFFFF;

// NCNN_LOGE("%d %d %d", sign, exponent, significand);

// 1 : 5 : 10
unsigned short fp16;
if (exponent == 0)
{
// zero or denormal, always underflow
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else if (exponent == 0xFF)
{
// infinity or NaN
fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
}
else
{
// normalized
short newexp = exponent + (-127 + 15);
if (newexp >= 31)
{
// overflow, return infinity
fp16 = (sign << 15) | (0x1F << 10) | 0x00;
}
else if (newexp <= 0)
{
// Some normal fp32 cannot be expressed as normal fp16
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else
{
// normal fp16
fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
}
}

return fp16;
}

int save_onnx(const Graph& g, const char* onnxpath, int fp16)
{
onnx::ModelProto model;

@@ -52,10 +106,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
switch (x->type)
{
case 1: // f32
tpt->set_elem_type(1);
tpt->set_elem_type(fp16 ? 10 : 1);
break;
case 2: // f64
tpt->set_elem_type(11);
tpt->set_elem_type(fp16 ? 10 : 11);
break;
case 3: // f16
tpt->set_elem_type(10);
@@ -120,16 +174,12 @@ int save_onnx(const Graph& g, const char* onnxpath)
}

std::vector<const char*> params_keys = get_operator_params_keys(op);

// for (const auto& it : op->params)
for (const char* param_name : params_keys)
{
// const Parameter& param = it.second;
const Parameter& param = get_operator_param(op, param_name);

onnx::AttributeProto* ap = np->add_attribute();

// ap->set_name(get_param_name(it));
ap->set_name(param_name);

if (param.type == 0)
@@ -180,8 +230,6 @@ int save_onnx(const Graph& g, const char* onnxpath)
}

std::vector<const char*> attrs_keys = get_operator_attrs_keys(op);

// for (const auto& it : op->attrs)
for (const char* attr_name : attrs_keys)
{
onnx::TensorProto* tp = gp->add_initializer();
@@ -190,7 +238,6 @@ int save_onnx(const Graph& g, const char* onnxpath)

np->add_input(std::string(get_operator_name(op)) + "." + attr_name);

// const Attribute& attr = it.second;
const Attribute& attr = get_operator_attr(op, attr_name);
for (auto s : attr.shape)
{
@@ -200,10 +247,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
switch (attr.type)
{
case 1: // f32
tp->set_data_type(1);
tp->set_data_type(fp16 ? 10 : 1);
break;
case 2: // f64
tp->set_data_type(11);
tp->set_data_type(fp16 ? 10 : 11);
break;
case 3: // f16
tp->set_data_type(10);
@@ -241,18 +288,36 @@ int save_onnx(const Graph& g, const char* onnxpath)
}

std::string* d = tp->mutable_raw_data();
d->resize(attr.data.size());
memcpy((void*)d->data(), attr.data.data(), attr.data.size());
if (fp16 && attr.type == 1)
{
// fp32 to fp16
const float* p = (const float*)attr.data.data();
int len = attr.data.size() / 4;
d->resize(len * 2);
unsigned short* p_fp16 = (unsigned short*)d->data();
for (int i = 0; i < len; i++)
{
p_fp16[i] = float32_to_float16(p[i]);
}
}
else if (fp16 && attr.type == 2)
{
// fp64 to fp16
const double* p = (const double*)attr.data.data();
int len = attr.data.size() / 4;
d->resize(len);
unsigned short* p_fp16 = (unsigned short*)d->data();
for (int i = 0; i < len; i++)
{
p_fp16[i] = float32_to_float16((float)p[i]);
}
}
else
{
d->resize(attr.data.size());
memcpy((void*)d->data(), attr.data.data(), attr.data.size());
}
}

// if (op->inputnames.size() == op->inputs.size())
// {
// for (size_t i = 0; i < op->inputs.size(); i++)
// {
// const Operand* oprand = op->inputs[i];
// fprintf(paramfp, " $%s=%s", op->inputnames[i].c_str(), oprand->name.c_str());
// }
// }
}

std::fstream output(onnxpath, std::ios::out | std::ios::trunc | std::ios::binary);


+ 1
- 1
tools/pnnx/src/save_onnx.h View File

@@ -19,7 +19,7 @@

namespace pnnx {

int save_onnx(const Graph& g, const char* onnxpath);
int save_onnx(const Graph& g, const char* onnxpath, int fp16);

} // namespace pnnx



Loading…
Cancel
Save