diff --git a/tools/pnnx/README.md b/tools/pnnx/README.md
index 7c30b68ba..882280d1f 100644
--- a/tools/pnnx/README.md
+++ b/tools/pnnx/README.md
@@ -93,6 +93,7 @@ Usage: pnnx [model.pt] [(key=value)...]
   ncnnparam=model.ncnn.param
   ncnnbin=model.ncnn.bin
   ncnnpy=model_ncnn.py
+  fp16=1
   optlevel=2
   device=cpu/gpu
   inputshape=[1,3,224,224],...
@@ -119,6 +120,8 @@ Parameters:
 
 `ncnnpy` (default="*_ncnn.py"): pyncnn script for inference
 
+`fp16` (default=1): save ncnn weight and onnx in fp16 data type
+
 `optlevel` (default=2): graph optimization level 
 
 | Option | Optimization level              |
diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt
index 38e0516c3..f2a9b6a9f 100644
--- a/tools/pnnx/src/CMakeLists.txt
+++ b/tools/pnnx/src/CMakeLists.txt
@@ -343,7 +343,6 @@ set(pnnx_pass_ncnn_SRCS
     pass_ncnn/insert_split.cpp
     pass_ncnn/chain_multi_output.cpp
     pass_ncnn/solve_batch_index.cpp
-    pass_ncnn/convert_to_fp16_model.cpp
 
     pass_ncnn/eliminate_noop.cpp
     pass_ncnn/eliminate_tail_reshape_permute.cpp
diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp
index 89c81d8d6..98066c4c5 100644
--- a/tools/pnnx/src/main.cpp
+++ b/tools/pnnx/src/main.cpp
@@ -168,6 +168,7 @@ static void show_usage()
     fprintf(stderr, "  ncnnparam=model.ncnn.param\n");
     fprintf(stderr, "  ncnnbin=model.ncnn.bin\n");
     fprintf(stderr, "  ncnnpy=model_ncnn.py\n");
+    fprintf(stderr, "  fp16=1\n");
     fprintf(stderr, "  optlevel=2\n");
     fprintf(stderr, "  device=cpu/gpu\n");
     fprintf(stderr, "  inputshape=[1,3,224,224],...\n");
@@ -210,6 +211,7 @@ int main(int argc, char** argv)
     std::string ncnnparampath = ptbase + ".ncnn.param";
     std::string ncnnbinpath = ptbase + ".ncnn.bin";
     std::string ncnnpypath = ptbase + "_ncnn.py";
+    int fp16 = 1;
     int optlevel = 2;
     std::string device = "cpu";
     std::vector<std::vector<int64_t> > input_shapes;
@@ -250,6 +252,8 @@ int main(int argc, char** argv)
             ncnnbinpath = std::string(value);
         if (strcmp(key, "ncnnpy") == 0)
             ncnnpypath = std::string(value);
+        if (strcmp(key, "fp16") == 0)
+            fp16 = atoi(value);
         if (strcmp(key, "optlevel") == 0)
             optlevel = atoi(value);
         if (strcmp(key, "device") == 0)
@@ -273,6 +277,7 @@ int main(int argc, char** argv)
         fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str());
         fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str());
         fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str());
+        fprintf(stderr, "fp16 = %d\n", fp16);
         fprintf(stderr, "optlevel = %d\n", optlevel);
         fprintf(stderr, "device = %s\n", device.c_str());
         fprintf(stderr, "inputshape = ");
@@ -415,7 +420,7 @@ int main(int argc, char** argv)
     pnnx_graph.python(pnnxpypath, pnnxbinpath);
 
 #if BUILD_PNNX2ONNX
-    pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str());
+    pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16);
 #else
     fprintf(stderr, "pnnx build without onnx-zero support, skip saving onnx\n");
 #endif
@@ -426,7 +431,7 @@ int main(int argc, char** argv)
 
         pnnx::pass_ncnn(pnnx_graph);
 
-        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath);
+        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16);
     }
 
     //     pnnx::Graph pnnx_graph2;
diff --git a/tools/pnnx/src/pass_ncnn.cpp b/tools/pnnx/src/pass_ncnn.cpp
index c4a8d46af..14dedf0e8 100644
--- a/tools/pnnx/src/pass_ncnn.cpp
+++ b/tools/pnnx/src/pass_ncnn.cpp
@@ -31,7 +31,6 @@
 #include "pass_ncnn/insert_split.h"
 #include "pass_ncnn/chain_multi_output.h"
 #include "pass_ncnn/solve_batch_index.h"
-#include "pass_ncnn/convert_to_fp16_model.h"
 
 #include "pass_ncnn/eliminate_noop.h"
 #include "pass_ncnn/eliminate_tail_reshape_permute.h"
@@ -134,8 +133,6 @@ void pass_ncnn(Graph& g)
     ncnn::convert_input(g);
 
     ncnn::eliminate_output(g);
-
-    ncnn::convert_to_fp16_model(g);
 }
 
 } // namespace pnnx
diff --git a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp
deleted file mode 100644
index ee004b2b5..000000000
--- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include "convert_to_fp16_model.h"
-
-namespace pnnx {
-
-namespace ncnn {
-
-static unsigned short float32_to_float16(float value)
-{
-    // 1 : 8 : 23
-    union
-    {
-        unsigned int u;
-        float f;
-    } tmp;
-
-    tmp.f = value;
-
-    // 1 : 8 : 23
-    unsigned short sign = (tmp.u & 0x80000000) >> 31;
-    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
-    unsigned int significand = tmp.u & 0x7FFFFF;
-
-    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
-
-    // 1 : 5 : 10
-    unsigned short fp16;
-    if (exponent == 0)
-    {
-        // zero or denormal, always underflow
-        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
-    }
-    else if (exponent == 0xFF)
-    {
-        // infinity or NaN
-        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
-    }
-    else
-    {
-        // normalized
-        short newexp = exponent + (-127 + 15);
-        if (newexp >= 31)
-        {
-            // overflow, return infinity
-            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
-        }
-        else if (newexp <= 0)
-        {
-            // Some normal fp32 cannot be expressed as normal fp16
-            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
-        }
-        else
-        {
-            // normal fp16
-            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
-        }
-    }
-
-    return fp16;
-}
-
-static size_t alignSize(size_t sz, int n)
-{
-    return (sz + n - 1) & -n;
-}
-
-void convert_to_fp16_model(Graph& graph)
-{
-    for (Operator* op : graph.ops)
-    {
-        bool is_type_flag_fp32 = false;
-        for (auto& it : op->attrs)
-        {
-            Attribute& attr = it.second;
-
-            if (is_type_flag_fp32)
-            {
-                // fp32 -> fp16
-                const float* p = (const float*)attr.data.data();
-                int len = attr.data.size() / 4;
-                std::vector<char> data_fp16(alignSize(len * 2, 4));
-                unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
-                for (int i = 0; i < len; i++)
-                {
-                    p_fp16[i] = float32_to_float16(p[i]);
-                }
-
-                // pad size to 4bytes
-                if (len % 2 == 1)
-                {
-                    // pad with fixed value for model hash consistency
-                    p_fp16[len] = 0x2283;
-                }
-
-                attr.type = 3;
-                attr.data = data_fp16;
-
-                is_type_flag_fp32 = false;
-                continue;
-            }
-
-            if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
-            {
-                // write fp16 flag
-                // unsigned int fp16_flag = 0x01306B47;
-                attr.data[0] = 0x47;
-                attr.data[1] = 0x6B;
-                attr.data[2] = 0x30;
-                attr.data[3] = 0x01;
-
-                is_type_flag_fp32 = true;
-                continue;
-            }
-        }
-    }
-}
-
-} // namespace ncnn
-
-} // namespace pnnx
diff --git a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h b/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h
deleted file mode 100644
index 3f609d30c..000000000
--- a/tools/pnnx/src/pass_ncnn/convert_to_fp16_model.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include "pass_ncnn.h"
-
-namespace pnnx {
-
-namespace ncnn {
-
-void convert_to_fp16_model(Graph& graph);
-
-} // namespace ncnn
-
-} // namespace pnnx
diff --git a/tools/pnnx/src/save_ncnn.cpp b/tools/pnnx/src/save_ncnn.cpp
index b0710e9db..6a4407879 100644
--- a/tools/pnnx/src/save_ncnn.cpp
+++ b/tools/pnnx/src/save_ncnn.cpp
@@ -61,7 +61,66 @@ static bool string_is_positive_integer(const std::string& t)
     return true;
 }
 
-int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath)
+static unsigned short float32_to_float16(float value)
+{
+    // 1 : 8 : 23
+    union
+    {
+        unsigned int u;
+        float f;
+    } tmp;
+
+    tmp.f = value;
+
+    // 1 : 8 : 23
+    unsigned short sign = (tmp.u & 0x80000000) >> 31;
+    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
+    unsigned int significand = tmp.u & 0x7FFFFF;
+
+    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
+
+    // 1 : 5 : 10
+    unsigned short fp16;
+    if (exponent == 0)
+    {
+        // zero or denormal, always underflow
+        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+    }
+    else if (exponent == 0xFF)
+    {
+        // infinity or NaN
+        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
+    }
+    else
+    {
+        // normalized
+        short newexp = exponent + (-127 + 15);
+        if (newexp >= 31)
+        {
+            // overflow, return infinity
+            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
+        }
+        else if (newexp <= 0)
+        {
+            // Some normal fp32 cannot be expressed as normal fp16
+            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+        }
+        else
+        {
+            // normal fp16
+            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
+        }
+    }
+
+    return fp16;
+}
+
+static size_t alignSize(size_t sz, int n)
+{
+    return (sz + n - 1) & -n;
+}
+
+int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16)
 {
     FILE* paramfp = fopen(parampath.c_str(), "wb");
     if (!paramfp)
@@ -196,12 +255,48 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b
             }
         }
 
+        bool is_type_flag_fp32 = false;
         for (const auto& it : op->attrs)
         {
             //             fprintf(paramfp, " @%s=", it.first.c_str());
 
             const Attribute& attr = it.second;
 
+            if (fp16 && is_type_flag_fp32)
+            {
+                // fp32 -> fp16
+                const float* p = (const float*)attr.data.data();
+                int len = attr.data.size() / 4;
+                std::vector<char> data_fp16(alignSize(len * 2, 4));
+                unsigned short* p_fp16 = (unsigned short*)data_fp16.data();
+                for (int i = 0; i < len; i++)
+                {
+                    p_fp16[i] = float32_to_float16(p[i]);
+                }
+
+                // pad size to 4bytes
+                if (len % 2 == 1)
+                {
+                    // pad with fixed value for model hash consistency
+                    p_fp16[len] = 0x2283;
+                }
+
+                fwrite(data_fp16.data(), data_fp16.size(), 1, binfp);
+
+                is_type_flag_fp32 = false;
+                continue;
+            }
+
+            if (fp16 && attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
+            {
+                // write fp16 flag
+                unsigned int fp16_flag = 0x01306B47;
+                fwrite((const char*)&fp16_flag, sizeof(fp16_flag), 1, binfp);
+
+                is_type_flag_fp32 = true;
+                continue;
+            }
+
             fwrite(attr.data.data(), attr.data.size(), 1, binfp);
         }
 
diff --git a/tools/pnnx/src/save_ncnn.h b/tools/pnnx/src/save_ncnn.h
index c49f506d3..458c14700 100644
--- a/tools/pnnx/src/save_ncnn.h
+++ b/tools/pnnx/src/save_ncnn.h
@@ -19,7 +19,7 @@
 
 namespace pnnx {
 
-int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath);
+int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16);
 
 } // namespace pnnx
 
diff --git a/tools/pnnx/src/save_onnx.cpp b/tools/pnnx/src/save_onnx.cpp
index 86c64b904..55bb10cf7 100644
--- a/tools/pnnx/src/save_onnx.cpp
+++ b/tools/pnnx/src/save_onnx.cpp
@@ -33,7 +33,61 @@ extern const Attribute& get_operator_attr(const Operator* op, const char* key);
 extern const char* get_param_s(const Parameter& p);
 extern std::vector<const char*> get_param_as(const Parameter& p);
 
-int save_onnx(const Graph& g, const char* onnxpath)
+static unsigned short float32_to_float16(float value)
+{
+    // 1 : 8 : 23
+    union
+    {
+        unsigned int u;
+        float f;
+    } tmp;
+
+    tmp.f = value;
+
+    // 1 : 8 : 23
+    unsigned short sign = (tmp.u & 0x80000000) >> 31;
+    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
+    unsigned int significand = tmp.u & 0x7FFFFF;
+
+    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
+
+    // 1 : 5 : 10
+    unsigned short fp16;
+    if (exponent == 0)
+    {
+        // zero or denormal, always underflow
+        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+    }
+    else if (exponent == 0xFF)
+    {
+        // infinity or NaN
+        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
+    }
+    else
+    {
+        // normalized
+        short newexp = exponent + (-127 + 15);
+        if (newexp >= 31)
+        {
+            // overflow, return infinity
+            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
+        }
+        else if (newexp <= 0)
+        {
+            // Some normal fp32 cannot be expressed as normal fp16
+            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+        }
+        else
+        {
+            // normal fp16
+            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
+        }
+    }
+
+    return fp16;
+}
+
+int save_onnx(const Graph& g, const char* onnxpath, int fp16)
 {
     onnx::ModelProto model;
 
@@ -52,10 +106,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
         switch (x->type)
         {
         case 1: // f32
-            tpt->set_elem_type(1);
+            tpt->set_elem_type(fp16 ? 10 : 1);
             break;
         case 2: // f64
-            tpt->set_elem_type(11);
+            tpt->set_elem_type(fp16 ? 10 : 11);
             break;
         case 3: // f16
             tpt->set_elem_type(10);
@@ -120,16 +174,12 @@ int save_onnx(const Graph& g, const char* onnxpath)
         }
 
         std::vector<const char*> params_keys = get_operator_params_keys(op);
-
-        // for (const auto& it : op->params)
         for (const char* param_name : params_keys)
         {
-            // const Parameter& param = it.second;
             const Parameter& param = get_operator_param(op, param_name);
 
             onnx::AttributeProto* ap = np->add_attribute();
 
-            // ap->set_name(get_param_name(it));
             ap->set_name(param_name);
 
             if (param.type == 0)
@@ -180,8 +230,6 @@ int save_onnx(const Graph& g, const char* onnxpath)
         }
 
         std::vector<const char*> attrs_keys = get_operator_attrs_keys(op);
-
-        // for (const auto& it : op->attrs)
         for (const char* attr_name : attrs_keys)
         {
             onnx::TensorProto* tp = gp->add_initializer();
@@ -190,7 +238,6 @@ int save_onnx(const Graph& g, const char* onnxpath)
 
             np->add_input(std::string(get_operator_name(op)) + "." + attr_name);
 
-            // const Attribute& attr = it.second;
             const Attribute& attr = get_operator_attr(op, attr_name);
             for (auto s : attr.shape)
             {
@@ -200,10 +247,10 @@ int save_onnx(const Graph& g, const char* onnxpath)
             switch (attr.type)
             {
             case 1: // f32
-                tp->set_data_type(1);
+                tp->set_data_type(fp16 ? 10 : 1);
                 break;
             case 2: // f64
-                tp->set_data_type(11);
+                tp->set_data_type(fp16 ? 10 : 11);
                 break;
             case 3: // f16
                 tp->set_data_type(10);
@@ -241,18 +288,36 @@ int save_onnx(const Graph& g, const char* onnxpath)
             }
 
             std::string* d = tp->mutable_raw_data();
-            d->resize(attr.data.size());
-            memcpy((void*)d->data(), attr.data.data(), attr.data.size());
+            if (fp16 && attr.type == 1)
+            {
+                // fp32 to fp16
+                const float* p = (const float*)attr.data.data();
+                int len = attr.data.size() / 4;
+                d->resize(len * 2);
+                unsigned short* p_fp16 = (unsigned short*)d->data();
+                for (int i = 0; i < len; i++)
+                {
+                    p_fp16[i] = float32_to_float16(p[i]);
+                }
+            }
+            else if (fp16 && attr.type == 2)
+            {
+                // fp64 to fp16
+                const double* p = (const double*)attr.data.data();
+                int len = attr.data.size() / 4;
+                d->resize(len);
+                unsigned short* p_fp16 = (unsigned short*)d->data();
+                for (int i = 0; i < len; i++)
+                {
+                    p_fp16[i] = float32_to_float16((float)p[i]);
+                }
+            }
+            else
+            {
+                d->resize(attr.data.size());
+                memcpy((void*)d->data(), attr.data.data(), attr.data.size());
+            }
         }
-
-        //         if (op->inputnames.size() == op->inputs.size())
-        //         {
-        //             for (size_t i = 0; i < op->inputs.size(); i++)
-        //             {
-        //                 const Operand* oprand = op->inputs[i];
-        //                 fprintf(paramfp, " $%s=%s", op->inputnames[i].c_str(), oprand->name.c_str());
-        //             }
-        //         }
     }
 
     std::fstream output(onnxpath, std::ios::out | std::ios::trunc | std::ios::binary);
diff --git a/tools/pnnx/src/save_onnx.h b/tools/pnnx/src/save_onnx.h
index 236a9911e..9a4099872 100644
--- a/tools/pnnx/src/save_onnx.h
+++ b/tools/pnnx/src/save_onnx.h
@@ -19,7 +19,7 @@
 
 namespace pnnx {
 
-int save_onnx(const Graph& g, const char* onnxpath);
+int save_onnx(const Graph& g, const char* onnxpath, int fp16);
 
 } // namespace pnnx