diff --git a/src/layer/x86/softmax_x86.cpp b/src/layer/x86/softmax_x86.cpp
index 96f1ff78f..d1df7e446 100644
--- a/src/layer/x86/softmax_x86.cpp
+++ b/src/layer/x86/softmax_x86.cpp
@@ -638,7 +638,7 @@ int Softmax_x86::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
             }
             _max = _mm256_max_ps(_max, _mm256_permute_ps(_max, _MM_SHUFFLE(2, 3, 0, 1)));
             _max = _mm256_max_ps(_max, _mm256_permute_ps(_max, _MM_SHUFFLE(1, 0, 3, 2)));
-            _max = _mm256_max_ps(_max, _mm256_permute2f128_ps(_max, _max, _MM_SHUFFLE(1, 0, 1, 0)));
+            _max = _mm256_max_ps(_max, _mm256_permute2f128_ps(_max, _max, _MM_SHUFFLE(0, 0, 0, 1)));
 
             __m256 _sum = _mm256_setzero_ps();
             for (int i = 0; i < w; i++)
@@ -650,7 +650,7 @@ int Softmax_x86::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
             }
             _sum = _mm256_add_ps(_sum, _mm256_permute_ps(_sum, _MM_SHUFFLE(2, 3, 0, 1)));
             _sum = _mm256_add_ps(_sum, _mm256_permute_ps(_sum, _MM_SHUFFLE(1, 0, 3, 2)));
-            _sum = _mm256_add_ps(_sum, _mm256_permute2f128_ps(_sum, _sum, _MM_SHUFFLE(1, 0, 1, 0)));
+            _sum = _mm256_add_ps(_sum, _mm256_permute2f128_ps(_sum, _sum, _MM_SHUFFLE(0, 0, 0, 1)));
 
             for (int i = 0; i < w; i++)
             {
diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp
index 43ba136eb..c2a406658 100644
--- a/tools/pnnx/src/ir.cpp
+++ b/tools/pnnx/src/ir.cpp
@@ -392,6 +392,42 @@ bool operator==(const Attribute& lhs, const Attribute& rhs)
     return true;
 }
 
+Attribute operator+(const Attribute& a, const Attribute& b)
+{
+    Attribute c;
+
+    if (a.type != b.type)
+    {
+        fprintf(stderr, "concat attribute type mismatch\n");
+        return c;
+    }
+
+    if (a.shape.size() != b.shape.size())
+    {
+        fprintf(stderr, "concat attribute shape rank mismatch\n");
+        return c;
+    }
+
+    for (int i = 1; i < (int)a.shape.size(); i++)
+    {
+        if (a.shape[i] != b.shape[i])
+        {
+            fprintf(stderr, "concat attribute shape mismatch\n");
+            return c;
+        }
+    }
+
+    c.type = a.type;
+    c.shape = a.shape;
+    c.shape[0] += b.shape[0]; // concat the first dim
+
+    c.data.resize(a.data.size() + b.data.size());
+    memcpy(c.data.data(), a.data.data(), a.data.size());
+    memcpy(c.data.data() + a.data.size(), b.data.data(), b.data.size());
+
+    return c;
+}
+
 Parameter Parameter::parse_from_string(const std::string& value)
 {
     Parameter p;
@@ -2001,6 +2037,60 @@ static bool string_is_positive_integer(const std::string& t)
     return true;
 }
 
+static unsigned short float32_to_float16(float value)
+{
+    // 1 : 8 : 23
+    union
+    {
+        unsigned int u;
+        float f;
+    } tmp;
+
+    tmp.f = value;
+
+    // 1 : 8 : 23
+    unsigned short sign = (tmp.u & 0x80000000) >> 31;
+    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
+    unsigned int significand = tmp.u & 0x7FFFFF;
+
+    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
+
+    // 1 : 5 : 10
+    unsigned short fp16;
+    if (exponent == 0)
+    {
+        // zero or denormal, always underflow
+        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+    }
+    else if (exponent == 0xFF)
+    {
+        // infinity or NaN
+        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
+    }
+    else
+    {
+        // normalized
+        short newexp = exponent + (-127 + 15);
+        if (newexp >= 31)
+        {
+            // overflow, return infinity
+            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
+        }
+        else if (newexp <= 0)
+        {
+            // Some normal fp32 cannot be expressed as normal fp16
+            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
+        }
+        else
+        {
+            // normal fp16
+            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
+        }
+    }
+
+    return fp16;
+}
+
 int Graph::ncnn(const std::string& parampath, const std::string& binpath, const std::string& pypath)
 {
     FILE* paramfp = fopen(parampath.c_str(), "wb");
@@ -2136,12 +2226,38 @@ int Graph::ncnn(const std::string& parampath, const std::string& binpath, const
             }
         }
 
+        bool is_type_flag_fp32 = false;
         for (const auto& it : op->attrs)
         {
             //             fprintf(paramfp, " @%s=", it.first.c_str());
 
             const Attribute& attr = it.second;
 
+            if (is_type_flag_fp32)
+            {
+                // fp32 -> fp16
+                const float* p = (const float*)attr.data.data();
+                int len = attr.data.size() / 4;
+                for (int i = 0; i < len; i++)
+                {
+                    unsigned short v_fp16 = float32_to_float16(p[i]);
+                    fwrite(&v_fp16, sizeof(v_fp16), 1, binfp);
+                }
+
+                is_type_flag_fp32 = false;
+                continue;
+            }
+
+            if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
+            {
+                // write fp16 flag
+                unsigned int fp16_flag = 0x01306B47;
+                fwrite(&fp16_flag, sizeof(fp16_flag), 1, binfp);
+
+                is_type_flag_fp32 = true;
+                continue;
+            }
+
             fwrite(attr.data.data(), attr.data.size(), 1, binfp);
         }
 
diff --git a/tools/pnnx/src/ir.h b/tools/pnnx/src/ir.h
index 9a86176c0..f3524eaaf 100644
--- a/tools/pnnx/src/ir.h
+++ b/tools/pnnx/src/ir.h
@@ -155,6 +155,9 @@ public:
 
 bool operator==(const Attribute& lhs, const Attribute& rhs);
 
+// concat two attributes along the first axis
+Attribute operator+(const Attribute& a, const Attribute& b);
+
 class Operator;
 class Operand
 {
diff --git a/tools/pnnx/src/pass_ncnn/nn_GRU.cpp b/tools/pnnx/src/pass_ncnn/nn_GRU.cpp
index 7e4231e6a..0514fd7cc 100644
--- a/tools/pnnx/src/pass_ncnn/nn_GRU.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_GRU.cpp
@@ -60,13 +60,14 @@ pnnx.Output             output      2 0 out out_hidden
 
         // RUN-hidden-input_size
         {
-            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
             if (bidirectional)
-                op->attrs["2"] = captured_attrs.at("op_0.weight_ih_l0_reverse");
+                op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0") + captured_attrs.at("op_0.weight_ih_l0_reverse");
+            else
+                op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
         }
 
-        op->attrs["3"] = Attribute();
-        op->attrs["3"].data = {0, 0, 0, 0};
+        op->attrs["2"] = Attribute();
+        op->attrs["2"].data = {0, 0, 0, 0};
         if (captured_params.at("bias").b)
         {
             // reduce bias_ih and bias_hh
@@ -85,8 +86,6 @@ pnnx.Output             output      2 0 out out_hidden
                 memcpy(bias + num_output * 3, bias_hh + num_output * 2, num_output * sizeof(float));
             }
 
-            op->attrs["4"] = Attribute({4, num_output}, new_bias);
-
             if (bidirectional)
             {
                 std::vector<float> new_bias_reverse;
@@ -104,28 +103,32 @@ pnnx.Output             output      2 0 out out_hidden
                     memcpy(bias + num_output * 3, bias_hh + num_output * 2, num_output * sizeof(float));
                 }
 
-                op->attrs["5"] = Attribute({4, num_output}, new_bias_reverse);
+                op->attrs["3"] = Attribute({4, num_output}, new_bias) + Attribute({4, num_output}, new_bias_reverse);
+            }
+            else
+            {
+                op->attrs["3"] = Attribute({4, num_output}, new_bias);
             }
         }
         else
         {
             std::vector<float> bias(4 * num_output, 0.f);
-            op->attrs["4"] = Attribute({4, num_output}, bias);
 
             if (bidirectional)
-            {
-                op->attrs["5"] = Attribute({4, num_output}, bias);
-            }
+                op->attrs["3"] = Attribute({4, num_output}, bias) + Attribute({4, num_output}, bias);
+            else
+                op->attrs["3"] = Attribute({4, num_output}, bias);
         }
 
-        op->attrs["6"] = Attribute();
-        op->attrs["6"].data = {0, 0, 0, 0};
+        op->attrs["4"] = Attribute();
+        op->attrs["4"].data = {0, 0, 0, 0};
 
         // RUN-hidden-hidden
         {
-            op->attrs["7"] = captured_attrs.at("op_0.weight_hh_l0");
             if (bidirectional)
-                op->attrs["8"] = captured_attrs.at("op_0.weight_hh_l0_reverse");
+                op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0") + captured_attrs.at("op_0.weight_hh_l0_reverse");
+            else
+                op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0");
         }
     }
 };
diff --git a/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp b/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp
index 899ee1fac..ba62b5271 100644
--- a/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp
@@ -81,7 +81,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                 memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                 memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
             }
-            op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih);
 
             if (bidirectional)
             {
@@ -106,12 +105,16 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                     memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                     memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
                 }
-                op->attrs["2"] = Attribute({4, num_output, input_size}, new_weight_ih_reverse);
+                op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih) + Attribute({4, num_output, input_size}, new_weight_ih_reverse);
+            }
+            else
+            {
+                op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih);
             }
         }
 
-        op->attrs["3"] = Attribute();
-        op->attrs["3"].data = {0, 0, 0, 0};
+        op->attrs["2"] = Attribute();
+        op->attrs["2"].data = {0, 0, 0, 0};
         if (captured_params.at("bias").b)
         {
             // reduce bias_ih and bias_hh
@@ -153,8 +156,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                 }
             }
 
-            op->attrs["4"] = Attribute({4, num_output}, new_bias);
-
             if (bidirectional)
             {
                 std::vector<float> new_bias_reverse;
@@ -194,22 +195,25 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                     }
                 }
 
-                op->attrs["5"] = Attribute({4, num_output}, new_bias_reverse);
+                op->attrs["3"] = Attribute({4, num_output}, new_bias) + Attribute({4, num_output}, new_bias_reverse);
+            }
+            else
+            {
+                op->attrs["3"] = Attribute({4, num_output}, new_bias);
             }
         }
         else
         {
             std::vector<float> bias(4 * num_output, 0.f);
-            op->attrs["4"] = Attribute({4, num_output}, bias);
 
             if (bidirectional)
-            {
-                op->attrs["5"] = Attribute({4, num_output}, bias);
-            }
+                op->attrs["3"] = Attribute({4, num_output}, bias) + Attribute({4, num_output}, bias);
+            else
+                op->attrs["3"] = Attribute({4, num_output}, bias);
         }
 
-        op->attrs["6"] = Attribute();
-        op->attrs["6"].data = {0, 0, 0, 0};
+        op->attrs["4"] = Attribute();
+        op->attrs["4"].data = {0, 0, 0, 0};
 
         // reorder IFGO-hidden-hidden to IFOG-hidden-hidden
         {
@@ -234,7 +238,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                 memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                 memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
             }
-            op->attrs["7"] = Attribute({4, num_output, num_output}, new_weight_hh);
 
             if (bidirectional)
             {
@@ -259,7 +262,11 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                     memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                     memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
                 }
-                op->attrs["8"] = Attribute({4, num_output, num_output}, new_weight_hh_reverse);
+                op->attrs["5"] = Attribute({4, num_output, num_output}, new_weight_hh) + Attribute({4, num_output, num_output}, new_weight_hh_reverse);
+            }
+            else
+            {
+                op->attrs["5"] = Attribute({4, num_output, num_output}, new_weight_hh);
             }
         }
     }
diff --git a/tools/pnnx/src/pass_ncnn/nn_RNN.cpp b/tools/pnnx/src/pass_ncnn/nn_RNN.cpp
index c9deab9d9..fd64f18f0 100644
--- a/tools/pnnx/src/pass_ncnn/nn_RNN.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_RNN.cpp
@@ -63,12 +63,13 @@ pnnx.Output             output      2 0 out out_hidden
 
         op->attrs["0"] = Attribute();
         op->attrs["0"].data = {0, 0, 0, 0};
-        op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
         if (bidirectional)
-            op->attrs["2"] = captured_attrs.at("op_0.weight_ih_l0_reverse");
+            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0") + captured_attrs.at("op_0.weight_ih_l0_reverse");
+        else
+            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
 
-        op->attrs["3"] = Attribute();
-        op->attrs["3"].data = {0, 0, 0, 0};
+        op->attrs["2"] = Attribute();
+        op->attrs["2"].data = {0, 0, 0, 0};
         if (captured_params.at("bias").b)
         {
             // reduce bias_ih and bias_hh
@@ -85,8 +86,6 @@ pnnx.Output             output      2 0 out out_hidden
                 }
             }
 
-            op->attrs["4"] = Attribute({num_output}, new_bias);
-
             if (bidirectional)
             {
                 std::vector<float> new_bias_reverse;
@@ -102,25 +101,29 @@ pnnx.Output             output      2 0 out out_hidden
                     }
                 }
 
-                op->attrs["5"] = Attribute({num_output}, new_bias_reverse);
+                op->attrs["3"] = Attribute({num_output}, new_bias) + Attribute({num_output}, new_bias_reverse);
+            }
+            else
+            {
+                op->attrs["3"] = Attribute({num_output}, new_bias);
             }
         }
         else
         {
             std::vector<float> bias(num_output, 0.f);
-            op->attrs["4"] = Attribute({num_output}, bias);
 
             if (bidirectional)
-            {
-                op->attrs["5"] = Attribute({num_output}, bias);
-            }
+                op->attrs["3"] = Attribute({num_output}, bias) + Attribute({num_output}, bias);
+            else
+                op->attrs["3"] = Attribute({num_output}, bias);
         }
 
-        op->attrs["6"] = Attribute();
-        op->attrs["6"].data = {0, 0, 0, 0};
-        op->attrs["7"] = captured_attrs.at("op_0.weight_hh_l0");
+        op->attrs["4"] = Attribute();
+        op->attrs["4"].data = {0, 0, 0, 0};
         if (bidirectional)
-            op->attrs["8"] = captured_attrs.at("op_0.weight_hh_l0_reverse");
+            op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0") + captured_attrs.at("op_0.weight_hh_l0_reverse");
+        else
+            op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0");
     }
 };
 
diff --git a/tools/pnnx/tests/ncnn/test_F_conv1d.py b/tools/pnnx/tests/ncnn/test_F_conv1d.py
index d1d6290fa..c121fa119 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv1d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv1d.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
         return x, y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_conv2d.py b/tools/pnnx/tests/ncnn/test_F_conv2d.py
index 6a9a3591c..d27c6a961 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv2d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv2d.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
         return x, y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_conv3d.py b/tools/pnnx/tests/ncnn/test_F_conv3d.py
index 32a26cb73..36f2518b3 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv3d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv3d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
         return y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
index ee7a56195..479a2d6da 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
         return y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
index 64c6b7eab..7dc2e1e26 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
         return y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py b/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py
index ac3087f4e..a75d5e0a0 100644
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
         return y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_F_embedding.py b/tools/pnnx/tests/ncnn/test_F_embedding.py
index c45d1e5c2..41a565a1f 100644
--- a/tools/pnnx/tests/ncnn/test_F_embedding.py
+++ b/tools/pnnx/tests/ncnn/test_F_embedding.py
@@ -27,7 +27,7 @@ class Model(nn.Module):
         return y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_mobilenet_v2.py b/tools/pnnx/tests/ncnn/test_mobilenet_v2.py
index 29158c1be..ea6bddfca 100644
--- a/tools/pnnx/tests/ncnn/test_mobilenet_v2.py
+++ b/tools/pnnx/tests/ncnn/test_mobilenet_v2.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models
 
 def test():
-    net = models.mobilenet_v2()
+    net = models.mobilenet_v2().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
     import test_mobilenet_v2_ncnn
     b = test_mobilenet_v2_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-2, 1e-2)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py b/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py
index ae07ce249..309f59127 100644
--- a/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py
+++ b/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models
 
 def test():
-    net = models.mobilenet_v3_small()
+    net = models.mobilenet_v3_small().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
     import test_mobilenet_v3_small_ncnn
     b = test_mobilenet_v3_small_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-2, 1e-2)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py b/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py
index af1b40bf9..439a72269 100644
--- a/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py
+++ b/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py
@@ -81,10 +81,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
-            print(a0)
-            print(b0)
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_nn_Conv1d.py b/tools/pnnx/tests/ncnn/test_nn_Conv1d.py
index b398a0268..8517a90ed 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Conv1d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv1d.py
@@ -44,7 +44,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_Conv2d.py b/tools/pnnx/tests/ncnn/test_nn_Conv2d.py
index 33e83729c..19b625c6e 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Conv2d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv2d.py
@@ -44,7 +44,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_Conv3d.py b/tools/pnnx/tests/ncnn/test_nn_Conv3d.py
index 3032d3d50..1f52e3dd5 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Conv3d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv3d.py
@@ -46,7 +46,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py
index 07899cb3c..0f201e747 100644
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py
index f7ad8f83a..f13c7c866 100644
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py
index b5262919a..683c44755 100644
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_Embedding.py b/tools/pnnx/tests/ncnn/test_nn_Embedding.py
index c34a0b650..b97056fa9 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Embedding.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Embedding.py
@@ -27,7 +27,7 @@ class Model(nn.Module):
         return x
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_GRU.py b/tools/pnnx/tests/ncnn/test_nn_GRU.py
index 89d00ab38..3486ac550 100644
--- a/tools/pnnx/tests/ncnn/test_nn_GRU.py
+++ b/tools/pnnx/tests/ncnn/test_nn_GRU.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
         return x2, x3, x4, y2, y3, y4
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -76,9 +76,7 @@ def test():
     b = test_nn_GRU_ncnn.test_inference()
 
     for a0, b0 in zip(a, b):
-        if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
+        if not torch.allclose(a0, b0, 1e-3, 1e-3):
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_nn_LSTM.py b/tools/pnnx/tests/ncnn/test_nn_LSTM.py
index 55384e888..575d44aac 100644
--- a/tools/pnnx/tests/ncnn/test_nn_LSTM.py
+++ b/tools/pnnx/tests/ncnn/test_nn_LSTM.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
         return x2, x3, x4, y2, y3, y4
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -76,7 +76,7 @@ def test():
     b = test_nn_LSTM_ncnn.test_inference()
 
     for a0, b0 in zip(a, b):
-        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+        if not torch.allclose(a0, b0, 1e-3, 1e-3):
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_nn_Linear.py b/tools/pnnx/tests/ncnn/test_nn_Linear.py
index 81f29a434..a2a602898 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Linear.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Linear.py
@@ -33,7 +33,7 @@ class Model(nn.Module):
         return x, y
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py b/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py
index 76058da2d..580e5d5ec 100644
--- a/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py
+++ b/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
         return x0, y0
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
diff --git a/tools/pnnx/tests/ncnn/test_nn_RNN.py b/tools/pnnx/tests/ncnn/test_nn_RNN.py
index bc891b348..c2685b51c 100644
--- a/tools/pnnx/tests/ncnn/test_nn_RNN.py
+++ b/tools/pnnx/tests/ncnn/test_nn_RNN.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
         return x2, x3, x4, y2, y3, y4
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -76,7 +76,7 @@ def test():
     b = test_nn_RNN_ncnn.test_inference()
 
     for a0, b0 in zip(a, b):
-        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+        if not torch.allclose(a0, b0, 1e-3, 1e-3):
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_resnet18.py b/tools/pnnx/tests/ncnn/test_resnet18.py
index 46990baca..1a31f7949 100644
--- a/tools/pnnx/tests/ncnn/test_resnet18.py
+++ b/tools/pnnx/tests/ncnn/test_resnet18.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models
 
 def test():
-    net = models.resnet18()
+    net = models.resnet18().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
     import test_resnet18_ncnn
     b = test_resnet18_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-2, 1e-2)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py b/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py
index 608c056a0..430a6579e 100644
--- a/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py
+++ b/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models
 
 def test():
-    net = models.shufflenet_v2_x1_0()
+    net = models.shufflenet_v2_x1_0().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
     import test_shufflenet_v2_x1_0_ncnn
     b = test_shufflenet_v2_x1_0_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-2, 1e-2)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_squeezenet1_1.py b/tools/pnnx/tests/ncnn/test_squeezenet1_1.py
index 665b15edd..f7c35ae55 100644
--- a/tools/pnnx/tests/ncnn/test_squeezenet1_1.py
+++ b/tools/pnnx/tests/ncnn/test_squeezenet1_1.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models
 
 def test():
-    net = models.squeezenet1_1()
+    net = models.squeezenet1_1().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
     import test_squeezenet1_1_ncnn
     b = test_squeezenet1_1_ncnn.test_inference()
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-2, 1e-2)
 
 if __name__ == "__main__":
     if test():
diff --git a/tools/pnnx/tests/ncnn/test_torch_addmm.py b/tools/pnnx/tests/ncnn/test_torch_addmm.py
index 61c402a8c..3778dc43d 100644
--- a/tools/pnnx/tests/ncnn/test_torch_addmm.py
+++ b/tools/pnnx/tests/ncnn/test_torch_addmm.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
         return a, b, c
 
 def test():
-    net = Model()
+    net = Model().half().float()
     net.eval()
 
     torch.manual_seed(0)
@@ -58,10 +58,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
-            print(a0)
-            print(b0)
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_torch_chunk.py b/tools/pnnx/tests/ncnn/test_torch_chunk.py
index 1056839cf..c24125b23 100644
--- a/tools/pnnx/tests/ncnn/test_torch_chunk.py
+++ b/tools/pnnx/tests/ncnn/test_torch_chunk.py
@@ -49,8 +49,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.equal(a0, b0):
-            print(a0.shape)
-            print(b0.shape)
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_torch_matmul.py b/tools/pnnx/tests/ncnn/test_torch_matmul.py
index b8faa3286..e4cd3b9b1 100644
--- a/tools/pnnx/tests/ncnn/test_torch_matmul.py
+++ b/tools/pnnx/tests/ncnn/test_torch_matmul.py
@@ -93,10 +93,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
-            print(a0)
-            print(b0)
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_torch_squeeze.py b/tools/pnnx/tests/ncnn/test_torch_squeeze.py
index 56f0d69c8..339cebe8f 100644
--- a/tools/pnnx/tests/ncnn/test_torch_squeeze.py
+++ b/tools/pnnx/tests/ncnn/test_torch_squeeze.py
@@ -53,8 +53,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
             return False
     return True
 
diff --git a/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py b/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py
index d301a459e..baf121710 100644
--- a/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py
+++ b/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py
@@ -51,8 +51,6 @@ def test():
 
     for a0, b0 in zip(a, b):
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
-            print(a0.shape)
-            print(b0.shape)
             return False
     return True