pnnx save ncnn bin with fp16 storage (#3715)

4 years ago · a14acbc6dd
--- a/src/layer/x86/softmax_x86.cpp
+++ b/src/layer/x86/softmax_x86.cpp
@@ -638,7 +638,7 @@ int Softmax_x86::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
            }
            _max = _mm256_max_ps(_max, _mm256_permute_ps(_max, _MM_SHUFFLE(2, 3, 0, 1)));
            _max = _mm256_max_ps(_max, _mm256_permute_ps(_max, _MM_SHUFFLE(1, 0, 3, 2)));
            _max = _mm256_max_ps(_max, _mm256_permute2f128_ps(_max, _max, _MM_SHUFFLE(1, 0, 1, 0)));
            _max = _mm256_max_ps(_max, _mm256_permute2f128_ps(_max, _max, _MM_SHUFFLE(0, 0, 0, 1)));

            __m256 _sum = _mm256_setzero_ps();
            for (int i = 0; i < w; i++)
@@ -650,7 +650,7 @@ int Softmax_x86::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
            }
            _sum = _mm256_add_ps(_sum, _mm256_permute_ps(_sum, _MM_SHUFFLE(2, 3, 0, 1)));
            _sum = _mm256_add_ps(_sum, _mm256_permute_ps(_sum, _MM_SHUFFLE(1, 0, 3, 2)));
            _sum = _mm256_add_ps(_sum, _mm256_permute2f128_ps(_sum, _sum, _MM_SHUFFLE(1, 0, 1, 0)));
            _sum = _mm256_add_ps(_sum, _mm256_permute2f128_ps(_sum, _sum, _MM_SHUFFLE(0, 0, 0, 1)));

            for (int i = 0; i < w; i++)
            {
--- a/tools/pnnx/src/ir.cpp
+++ b/tools/pnnx/src/ir.cpp
@@ -392,6 +392,42 @@ bool operator==(const Attribute& lhs, const Attribute& rhs)
    return true;
 }

 Attribute operator+(const Attribute& a, const Attribute& b)
 {
    Attribute c;

    if (a.type != b.type)
    {
        fprintf(stderr, "concat attribute type mismatch\n");
        return c;
    }

    if (a.shape.size() != b.shape.size())
    {
        fprintf(stderr, "concat attribute shape rank mismatch\n");
        return c;
    }

    for (int i = 1; i < (int)a.shape.size(); i++)
    {
        if (a.shape[i] != b.shape[i])
        {
            fprintf(stderr, "concat attribute shape mismatch\n");
            return c;
        }
    }

    c.type = a.type;
    c.shape = a.shape;
    c.shape[0] += b.shape[0]; // concat the first dim

    c.data.resize(a.data.size() + b.data.size());
    memcpy(c.data.data(), a.data.data(), a.data.size());
    memcpy(c.data.data() + a.data.size(), b.data.data(), b.data.size());

    return c;
 }

 Parameter Parameter::parse_from_string(const std::string& value)
 {
    Parameter p;
@@ -2001,6 +2037,60 @@ static bool string_is_positive_integer(const std::string& t)
    return true;
 }

 static unsigned short float32_to_float16(float value)
 {
    // 1 : 8 : 23
    union
    {
        unsigned int u;
        float f;
    } tmp;

    tmp.f = value;

    // 1 : 8 : 23
    unsigned short sign = (tmp.u & 0x80000000) >> 31;
    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
    unsigned int significand = tmp.u & 0x7FFFFF;

    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);

    // 1 : 5 : 10
    unsigned short fp16;
    if (exponent == 0)
    {
        // zero or denormal, always underflow
        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
    }
    else if (exponent == 0xFF)
    {
        // infinity or NaN
        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
    }
    else
    {
        // normalized
        short newexp = exponent + (-127 + 15);
        if (newexp >= 31)
        {
            // overflow, return infinity
            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
        }
        else if (newexp <= 0)
        {
            // Some normal fp32 cannot be expressed as normal fp16
            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
        }
        else
        {
            // normal fp16
            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
        }
    }

    return fp16;
 }

 int Graph::ncnn(const std::string& parampath, const std::string& binpath, const std::string& pypath)
 {
    FILE* paramfp = fopen(parampath.c_str(), "wb");
@@ -2136,12 +2226,38 @@ int Graph::ncnn(const std::string& parampath, const std::string& binpath, const
            }
        }

        bool is_type_flag_fp32 = false;
        for (const auto& it : op->attrs)
        {
            //             fprintf(paramfp, " @%s=", it.first.c_str());

            const Attribute& attr = it.second;

            if (is_type_flag_fp32)
            {
                // fp32 -> fp16
                const float* p = (const float*)attr.data.data();
                int len = attr.data.size() / 4;
                for (int i = 0; i < len; i++)
                {
                    unsigned short v_fp16 = float32_to_float16(p[i]);
                    fwrite(&v_fp16, sizeof(v_fp16), 1, binfp);
                }

                is_type_flag_fp32 = false;
                continue;
            }

            if (attr.type == 0 && attr.data == std::vector<char> {0, 0, 0, 0})
            {
                // write fp16 flag
                unsigned int fp16_flag = 0x01306B47;
                fwrite(&fp16_flag, sizeof(fp16_flag), 1, binfp);

                is_type_flag_fp32 = true;
                continue;
            }

            fwrite(attr.data.data(), attr.data.size(), 1, binfp);
        }

--- a/tools/pnnx/src/ir.h
+++ b/tools/pnnx/src/ir.h
@@ -155,6 +155,9 @@ public:

 bool operator==(const Attribute& lhs, const Attribute& rhs);

 // concat two attributes along the first axis
 Attribute operator+(const Attribute& a, const Attribute& b);

 class Operator;
 class Operand
 {
--- a/tools/pnnx/src/pass_ncnn/nn_GRU.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_GRU.cpp
@@ -60,13 +60,14 @@ pnnx.Output             output      2 0 out out_hidden

        // RUN-hidden-input_size
        {
            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
            if (bidirectional)
                op->attrs["2"] = captured_attrs.at("op_0.weight_ih_l0_reverse");
                op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0") + captured_attrs.at("op_0.weight_ih_l0_reverse");
            else
                op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
        }

        op->attrs["3"] = Attribute();
        op->attrs["3"].data = {0, 0, 0, 0};
        op->attrs["2"] = Attribute();
        op->attrs["2"].data = {0, 0, 0, 0};
        if (captured_params.at("bias").b)
        {
            // reduce bias_ih and bias_hh
@@ -85,8 +86,6 @@ pnnx.Output             output      2 0 out out_hidden
                memcpy(bias + num_output * 3, bias_hh + num_output * 2, num_output * sizeof(float));
            }

            op->attrs["4"] = Attribute({4, num_output}, new_bias);

            if (bidirectional)
            {
                std::vector<float> new_bias_reverse;
@@ -104,28 +103,32 @@ pnnx.Output             output      2 0 out out_hidden
                    memcpy(bias + num_output * 3, bias_hh + num_output * 2, num_output * sizeof(float));
                }

                op->attrs["5"] = Attribute({4, num_output}, new_bias_reverse);
                op->attrs["3"] = Attribute({4, num_output}, new_bias) + Attribute({4, num_output}, new_bias_reverse);
            }
            else
            {
                op->attrs["3"] = Attribute({4, num_output}, new_bias);
            }
        }
        else
        {
            std::vector<float> bias(4 * num_output, 0.f);
            op->attrs["4"] = Attribute({4, num_output}, bias);

            if (bidirectional)
            {
                op->attrs["5"] = Attribute({4, num_output}, bias);
            }
                op->attrs["3"] = Attribute({4, num_output}, bias) + Attribute({4, num_output}, bias);
            else
                op->attrs["3"] = Attribute({4, num_output}, bias);
        }

        op->attrs["6"] = Attribute();
        op->attrs["6"].data = {0, 0, 0, 0};
        op->attrs["4"] = Attribute();
        op->attrs["4"].data = {0, 0, 0, 0};

        // RUN-hidden-hidden
        {
            op->attrs["7"] = captured_attrs.at("op_0.weight_hh_l0");
            if (bidirectional)
                op->attrs["8"] = captured_attrs.at("op_0.weight_hh_l0_reverse");
                op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0") + captured_attrs.at("op_0.weight_hh_l0_reverse");
            else
                op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0");
        }
    }
 };
--- a/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_LSTM.cpp
@@ -81,7 +81,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
            }
            op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih);

            if (bidirectional)
            {
@@ -106,12 +105,16 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                    memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                    memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
                }
                op->attrs["2"] = Attribute({4, num_output, input_size}, new_weight_ih_reverse);
                op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih) + Attribute({4, num_output, input_size}, new_weight_ih_reverse);
            }
            else
            {
                op->attrs["1"] = Attribute({4, num_output, input_size}, new_weight_ih);
            }
        }

        op->attrs["3"] = Attribute();
        op->attrs["3"].data = {0, 0, 0, 0};
        op->attrs["2"] = Attribute();
        op->attrs["2"].data = {0, 0, 0, 0};
        if (captured_params.at("bias").b)
        {
            // reduce bias_ih and bias_hh
@@ -153,8 +156,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                }
            }

            op->attrs["4"] = Attribute({4, num_output}, new_bias);

            if (bidirectional)
            {
                std::vector<float> new_bias_reverse;
@@ -194,22 +195,25 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                    }
                }

                op->attrs["5"] = Attribute({4, num_output}, new_bias_reverse);
                op->attrs["3"] = Attribute({4, num_output}, new_bias) + Attribute({4, num_output}, new_bias_reverse);
            }
            else
            {
                op->attrs["3"] = Attribute({4, num_output}, new_bias);
            }
        }
        else
        {
            std::vector<float> bias(4 * num_output, 0.f);
            op->attrs["4"] = Attribute({4, num_output}, bias);

            if (bidirectional)
            {
                op->attrs["5"] = Attribute({4, num_output}, bias);
            }
                op->attrs["3"] = Attribute({4, num_output}, bias) + Attribute({4, num_output}, bias);
            else
                op->attrs["3"] = Attribute({4, num_output}, bias);
        }

        op->attrs["6"] = Attribute();
        op->attrs["6"].data = {0, 0, 0, 0};
        op->attrs["4"] = Attribute();
        op->attrs["4"].data = {0, 0, 0, 0};

        // reorder IFGO-hidden-hidden to IFOG-hidden-hidden
        {
@@ -234,7 +238,6 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
            }
            op->attrs["7"] = Attribute({4, num_output, num_output}, new_weight_hh);

            if (bidirectional)
            {
@@ -259,7 +262,11 @@ pnnx.Output             output      3 0 out out_hidden out_cell
                    memcpy(w_optr, optr, weight_data_size_g * sizeof(float));
                    memcpy(w_gptr, gptr, weight_data_size_g * sizeof(float));
                }
                op->attrs["8"] = Attribute({4, num_output, num_output}, new_weight_hh_reverse);
                op->attrs["5"] = Attribute({4, num_output, num_output}, new_weight_hh) + Attribute({4, num_output, num_output}, new_weight_hh_reverse);
            }
            else
            {
                op->attrs["5"] = Attribute({4, num_output, num_output}, new_weight_hh);
            }
        }
    }
--- a/tools/pnnx/src/pass_ncnn/nn_RNN.cpp
+++ b/tools/pnnx/src/pass_ncnn/nn_RNN.cpp
@@ -63,12 +63,13 @@ pnnx.Output             output      2 0 out out_hidden

        op->attrs["0"] = Attribute();
        op->attrs["0"].data = {0, 0, 0, 0};
        op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");
        if (bidirectional)
            op->attrs["2"] = captured_attrs.at("op_0.weight_ih_l0_reverse");
            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0") + captured_attrs.at("op_0.weight_ih_l0_reverse");
        else
            op->attrs["1"] = captured_attrs.at("op_0.weight_ih_l0");

        op->attrs["3"] = Attribute();
        op->attrs["3"].data = {0, 0, 0, 0};
        op->attrs["2"] = Attribute();
        op->attrs["2"].data = {0, 0, 0, 0};
        if (captured_params.at("bias").b)
        {
            // reduce bias_ih and bias_hh
@@ -85,8 +86,6 @@ pnnx.Output             output      2 0 out out_hidden
                }
            }

            op->attrs["4"] = Attribute({num_output}, new_bias);

            if (bidirectional)
            {
                std::vector<float> new_bias_reverse;
@@ -102,25 +101,29 @@ pnnx.Output             output      2 0 out out_hidden
                    }
                }

                op->attrs["5"] = Attribute({num_output}, new_bias_reverse);
                op->attrs["3"] = Attribute({num_output}, new_bias) + Attribute({num_output}, new_bias_reverse);
            }
            else
            {
                op->attrs["3"] = Attribute({num_output}, new_bias);
            }
        }
        else
        {
            std::vector<float> bias(num_output, 0.f);
            op->attrs["4"] = Attribute({num_output}, bias);

            if (bidirectional)
            {
                op->attrs["5"] = Attribute({num_output}, bias);
            }
                op->attrs["3"] = Attribute({num_output}, bias) + Attribute({num_output}, bias);
            else
                op->attrs["3"] = Attribute({num_output}, bias);
        }

        op->attrs["6"] = Attribute();
        op->attrs["6"].data = {0, 0, 0, 0};
        op->attrs["7"] = captured_attrs.at("op_0.weight_hh_l0");
        op->attrs["4"] = Attribute();
        op->attrs["4"].data = {0, 0, 0, 0};
        if (bidirectional)
            op->attrs["8"] = captured_attrs.at("op_0.weight_hh_l0_reverse");
            op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0") + captured_attrs.at("op_0.weight_hh_l0_reverse");
        else
            op->attrs["5"] = captured_attrs.at("op_0.weight_hh_l0");
    }
 };

--- a/tools/pnnx/tests/ncnn/test_F_conv1d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv1d.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
        return x, y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_conv2d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv2d.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
        return x, y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_conv3d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv3d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
        return y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose1d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
        return y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose2d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
        return y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py
+++ b/tools/pnnx/tests/ncnn/test_F_conv_transpose3d.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
        return y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_F_embedding.py
+++ b/tools/pnnx/tests/ncnn/test_F_embedding.py
@@ -27,7 +27,7 @@ class Model(nn.Module):
        return y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_mobilenet_v2.py
+++ b/tools/pnnx/tests/ncnn/test_mobilenet_v2.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models

 def test():
    net = models.mobilenet_v2()
    net = models.mobilenet_v2().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
    import test_mobilenet_v2_ncnn
    b = test_mobilenet_v2_ncnn.test_inference()

    return torch.allclose(a, b, 1e-4, 1e-4)
    return torch.allclose(a, b, 1e-2, 1e-2)

 if __name__ == "__main__":
    if test():
--- a/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py
+++ b/tools/pnnx/tests/ncnn/test_mobilenet_v3_small.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models

 def test():
    net = models.mobilenet_v3_small()
    net = models.mobilenet_v3_small().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
    import test_mobilenet_v3_small_ncnn
    b = test_mobilenet_v3_small_ncnn.test_inference()

    return torch.allclose(a, b, 1e-4, 1e-4)
    return torch.allclose(a, b, 1e-2, 1e-2)

 if __name__ == "__main__":
    if test():
--- a/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py
+++ b/tools/pnnx/tests/ncnn/test_ncnn_fuse_transpose_matmul.py
@@ -81,10 +81,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
            print(a0)
            print(b0)
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_nn_Conv1d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv1d.py
@@ -44,7 +44,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_Conv2d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv2d.py
@@ -44,7 +44,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_Conv3d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Conv3d.py
@@ -46,7 +46,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose1d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose2d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py
+++ b/tools/pnnx/tests/ncnn/test_nn_ConvTranspose3d.py
@@ -42,7 +42,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_Embedding.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Embedding.py
@@ -27,7 +27,7 @@ class Model(nn.Module):
        return x

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_GRU.py
+++ b/tools/pnnx/tests/ncnn/test_nn_GRU.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
        return x2, x3, x4, y2, y3, y4

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -76,9 +76,7 @@ def test():
    b = test_nn_GRU_ncnn.test_inference()

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
        if not torch.allclose(a0, b0, 1e-3, 1e-3):
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_nn_LSTM.py
+++ b/tools/pnnx/tests/ncnn/test_nn_LSTM.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
        return x2, x3, x4, y2, y3, y4

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -76,7 +76,7 @@ def test():
    b = test_nn_LSTM_ncnn.test_inference()

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
        if not torch.allclose(a0, b0, 1e-3, 1e-3):
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_nn_Linear.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Linear.py
@@ -33,7 +33,7 @@ class Model(nn.Module):
        return x, y

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py
+++ b/tools/pnnx/tests/ncnn/test_nn_MultiheadAttention.py
@@ -36,7 +36,7 @@ class Model(nn.Module):
        return x0, y0

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
--- a/tools/pnnx/tests/ncnn/test_nn_RNN.py
+++ b/tools/pnnx/tests/ncnn/test_nn_RNN.py
@@ -54,7 +54,7 @@ class Model(nn.Module):
        return x2, x3, x4, y2, y3, y4

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -76,7 +76,7 @@ def test():
    b = test_nn_RNN_ncnn.test_inference()

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
        if not torch.allclose(a0, b0, 1e-3, 1e-3):
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_resnet18.py
+++ b/tools/pnnx/tests/ncnn/test_resnet18.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models

 def test():
    net = models.resnet18()
    net = models.resnet18().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
    import test_resnet18_ncnn
    b = test_resnet18_ncnn.test_inference()

    return torch.allclose(a, b, 1e-4, 1e-4)
    return torch.allclose(a, b, 1e-2, 1e-2)

 if __name__ == "__main__":
    if test():
--- a/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py
+++ b/tools/pnnx/tests/ncnn/test_shufflenet_v2_x1_0.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models

 def test():
    net = models.shufflenet_v2_x1_0()
    net = models.shufflenet_v2_x1_0().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
    import test_shufflenet_v2_x1_0_ncnn
    b = test_shufflenet_v2_x1_0_ncnn.test_inference()

    return torch.allclose(a, b, 1e-4, 1e-4)
    return torch.allclose(a, b, 1e-2, 1e-2)

 if __name__ == "__main__":
    if test():
--- a/tools/pnnx/tests/ncnn/test_squeezenet1_1.py
+++ b/tools/pnnx/tests/ncnn/test_squeezenet1_1.py
@@ -16,7 +16,7 @@ import torch
 import torchvision.models as models

 def test():
    net = models.squeezenet1_1()
    net = models.squeezenet1_1().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -36,7 +36,7 @@ def test():
    import test_squeezenet1_1_ncnn
    b = test_squeezenet1_1_ncnn.test_inference()

    return torch.allclose(a, b, 1e-4, 1e-4)
    return torch.allclose(a, b, 1e-2, 1e-2)

 if __name__ == "__main__":
    if test():
--- a/tools/pnnx/tests/ncnn/test_torch_addmm.py
+++ b/tools/pnnx/tests/ncnn/test_torch_addmm.py
@@ -30,7 +30,7 @@ class Model(nn.Module):
        return a, b, c

 def test():
    net = Model()
    net = Model().half().float()
    net.eval()

    torch.manual_seed(0)
@@ -58,10 +58,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
            print(a0)
            print(b0)
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_torch_chunk.py
+++ b/tools/pnnx/tests/ncnn/test_torch_chunk.py
@@ -49,8 +49,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.equal(a0, b0):
            print(a0.shape)
            print(b0.shape)
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_torch_matmul.py
+++ b/tools/pnnx/tests/ncnn/test_torch_matmul.py
@@ -93,10 +93,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
            print(a0)
            print(b0)
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_torch_squeeze.py
+++ b/tools/pnnx/tests/ncnn/test_torch_squeeze.py
@@ -53,8 +53,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
            return False
    return True

--- a/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py
+++ b/tools/pnnx/tests/ncnn/test_torch_unsqueeze.py
@@ -51,8 +51,6 @@ def test():

    for a0, b0 in zip(a, b):
        if not torch.allclose(a0, b0, 1e-4, 1e-4):
            print(a0.shape)
            print(b0.shape)
            return False
    return True