binaryop broadcasting special type 3 4 for lhs

6 years ago · 6077066b02
--- a/docs/developer-guide/binaryop-broadcasting.md
+++ b/docs/developer-guide/binaryop-broadcasting.md
@@ -34,3 +34,5 @@ some special broadcasting rule exists for model compatibility
 |---|---|---|---|
 |1|[2,3,4]|[1,1,4]|[2,3,4]|
 |2|[2,3,4]|[2,3,1]|[2,3,4]|
 |3|[1,1,4]|[2,3,4]|[2,3,4]|
 |4|[2,3,1]|[2,3,4]|[2,3,4]|
--- a/src/layer/arm/binaryop_arm.cpp
+++ b/src/layer/arm/binaryop_arm.cpp
@@ -60,21 +60,21 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt

    if (a.dims == 3)
    {
        c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 3)
        {
            if (w1 == 1 && h1 == 1 && channels1 == channels)
            {
                // special type 1
                c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q=0; q<channels; q++)
                {
                    const float* ptr = a.channel(q);
                    float* outptr = c.channel(q);
                    const float* b0 = b.channel(q);
                    float* outptr = c.channel(q);
                    float32x4_t _b0 = vld1q_f32(b0);
                    for (int i = 0; i < size; i++)
                    {
@@ -92,6 +92,10 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
            if (w1 == w && h1 == h && channels1 == 1 && elempack1 == 1)
            {
                // special type 2
                c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
@@ -113,7 +117,66 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
                return 0;
            }

            if (w == 1 && h == 1 && channels1 == channels)
            {
                // special type 3
                c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q=0; q<channels1; q++)
                {
                    const float* a0 = a.channel(q);
                    const float* ptr1 = b.channel(q);
                    float* outptr = c.channel(q);
                    float32x4_t _a0 = vld1q_f32(a0);
                    for (int i = 0; i < size1; i++)
                    {
                        float32x4_t _p1 = vld1q_f32(ptr1);
                        float32x4_t _outp = op(_a0, _p1);
                        vst1q_f32(outptr, _outp);
                        ptr1 += 4;
                        outptr += 4;
                    }
                }

                return 0;
            }

            if (w1 == w && h1 == h && channels == 1 && elempack == 1)
            {
                // special type 4
                c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const float* ptr = a;
                    const float* ptr1 = b.channel(q);
                    float* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        float32x4_t _p = vld1q_dup_f32(ptr);
                        float32x4_t _p1 = vld1q_f32(ptr1);
                        float32x4_t _outp = op(_p, _p1);
                        vst1q_f32(outptr, _outp);
                        ptr += 1;
                        ptr1 += 4;
                        outptr += 4;
                    }
                }

                return 0;
            }

            // type 19
            c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
            if (c.empty())
                return -100;

            #pragma omp parallel for num_threads(opt.num_threads)
            for (int q=0; q<channels; q++)
            {
@@ -136,6 +199,10 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
            return 0;
        }

        c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 2)
        {
            // type 18
@@ -216,7 +283,7 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
        if (b.dims == 3)
        {
            // type 14
            c.create(w1, h1, channels1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -396,7 +463,7 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
        if (b.dims == 3)
        {
            // type 9
            c.create(w1, h1, channels1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -423,7 +490,7 @@ static int binary_op_pack4(const Mat& a, const Mat& b, Mat& c, const Option& opt
        if (b.dims == 2)
        {
            // type 8
            c.create(w1, h1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -681,15 +748,15 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio

    if (a.dims == 3)
    {
        c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 3)
        {
            if (w1 == 1 && h1 == 1 && channels1 == channels)
            {
                // special type 1
                c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q=0; q<channels; q++)
                {
@@ -713,6 +780,10 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
            if (w1 == w && h1 == h && channels1 == 1 && elempack1 == 1)
            {
                // special type 2
                c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
@@ -734,7 +805,66 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
                return 0;
            }

            if (w == 1 && h == 1 && channels1 == channels)
            {
                // special type 3
                c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q=0; q<channels1; q++)
                {
                    const unsigned short* a0 = a.channel(q);
                    unsigned short* outptr = c.channel(q);
                    const unsigned short* ptr1 = b.channel(q);
                    float32x4_t _a0 = vreinterpretq_f32_u32(vshll_n_u16(vld1_u16(a0), 16));
                    for (int i = 0; i < size1; i++)
                    {
                        float32x4_t _p1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_u16(ptr1), 16));
                        float32x4_t _outp = op(_a0, _p1);
                        vst1_u16(outptr, vshrn_n_u32(vreinterpretq_u32_f32(_outp), 16));
                        ptr1 += 4;
                        outptr += 4;
                    }
                }

                return 0;
            }

            if (w1 == w && h1 == h && channels == 1 && elempack == 1)
            {
                // special type 4
                c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const unsigned short* ptr = a;
                    const unsigned short* ptr1 = b.channel(q);
                    unsigned short* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        float32x4_t _p = vdupq_n_f32(bfloat16_to_float32(*ptr));
                        float32x4_t _p1 = vreinterpretq_f32_u32(vshll_n_u16(vld1_u16(ptr1), 16));
                        float32x4_t _outp = op(_p, _p1);
                        vst1_u16(outptr, vshrn_n_u32(vreinterpretq_u32_f32(_outp), 16));
                        ptr += 1;
                        ptr1 += 4;
                        outptr += 4;
                    }
                }

                return 0;
            }

            // type 19
            c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
            if (c.empty())
                return -100;

            #pragma omp parallel for num_threads(opt.num_threads)
            for (int q=0; q<channels; q++)
            {
@@ -757,6 +887,10 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
            return 0;
        }

        c.create(w, h, channels, elemsize, elempack, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 2)
        {
            // type 18
@@ -837,7 +971,7 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
        if (b.dims == 3)
        {
            // type 14
            c.create(w1, h1, channels1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -1017,7 +1151,7 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
        if (b.dims == 3)
        {
            // type 9
            c.create(w1, h1, channels1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, channels1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -1044,7 +1178,7 @@ static int binary_op_pack4_bf16s(const Mat& a, const Mat& b, Mat& c, const Optio
        if (b.dims == 2)
        {
            // type 8
            c.create(w1, h1, elemsize, elempack, opt.blob_allocator);
            c.create(w1, h1, elemsize1, elempack1, opt.blob_allocator);
            if (c.empty())
                return -100;

@@ -1162,21 +1296,21 @@ static int binary_op_bf16s(const Mat& a, const Mat& b, Mat& c, const Option& opt

    if (a.dims == 3)
    {
        c.create(w, h, channels, elemsize, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 3)
        {
            if (w1 == 1 && h1 == 1 && channels1 == channels)
            {
                // special type 1
                c.create(w, h, channels, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
                    const unsigned short* ptr = a.channel(q);
                    unsigned short* outptr = c.channel(q);
                    const unsigned short* b0 = b.channel(q);
                    unsigned short* outptr = c.channel(q);
                    for (int i = 0; i < size; i++)
                    {
                        outptr[i] = float32_to_bfloat16(op(bfloat16_to_float32(ptr[i]), bfloat16_to_float32(b0[0])));
@@ -1189,6 +1323,10 @@ static int binary_op_bf16s(const Mat& a, const Mat& b, Mat& c, const Option& opt
            if (w1 == w && h1 == h && channels1 == 1)
            {
                // special type 2
                c.create(w, h, channels, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
@@ -1204,7 +1342,55 @@ static int binary_op_bf16s(const Mat& a, const Mat& b, Mat& c, const Option& opt
                return 0;
            }

            if (w == 1 && h == 1 && channels1 == channels)
            {
                // special type 3
                c.create(w1, h1, channels1, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const unsigned short* a0 = a.channel(q);
                    const unsigned short* ptr1 = b.channel(q);
                    unsigned short* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        outptr[i] = float32_to_bfloat16(op(bfloat16_to_float32(a0[0]), bfloat16_to_float32(ptr1[i])));
                    }
                }

                return 0;
            }

            if (w1 == w && h1 == h && channels == 1)
            {
                // special type 4
                c.create(w1, h1, channels1, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const unsigned short* ptr = a;
                    const unsigned short* ptr1 = b.channel(q);
                    unsigned short* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        outptr[i] = float32_to_bfloat16(op(bfloat16_to_float32(ptr[i]), bfloat16_to_float32(ptr1[i])));
                    }
                }

                return 0;
            }

            // type 19
            c.create(w, h, channels, elemsize, opt.blob_allocator);
            if (c.empty())
                return -100;

            #pragma omp parallel for num_threads(opt.num_threads)
            for (int q=0; q<channels; q++)
            {
@@ -1221,6 +1407,10 @@ static int binary_op_bf16s(const Mat& a, const Mat& b, Mat& c, const Option& opt
            return 0;
        }

        c.create(w, h, channels, elemsize, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 2)
        {
            // type 18
--- a/src/layer/binaryop.cpp
+++ b/src/layer/binaryop.cpp
@@ -63,21 +63,21 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c, const Option& opt)

    if (a.dims == 3)
    {
        c.create(w, h, channels, elemsize, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 3)
        {
            if (w1 == 1 && h1 == 1 && channels1 == channels)
            {
                // special type 1
                c.create(w, h, channels, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
                    const float* ptr = a.channel(q);
                    float* outptr = c.channel(q);
                    const float* b0 = b.channel(q);
                    float* outptr = c.channel(q);
                    for (int i = 0; i < size; i++)
                    {
                        outptr[i] = op(ptr[i], b0[0]);
@@ -90,6 +90,10 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c, const Option& opt)
            if (w1 == w && h1 == h && channels1 == 1)
            {
                // special type 2
                c.create(w, h, channels, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels; q++)
                {
@@ -105,7 +109,55 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c, const Option& opt)
                return 0;
            }

            if (w == 1 && h == 1 && channels1 == channels)
            {
                // special type 3
                c.create(w1, h1, channels1, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const float* a0 = a.channel(q);
                    const float* ptr1 = b.channel(q);
                    float* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        outptr[i] = op(a0[0], ptr1[i]);
                    }
                }

                return 0;
            }

            if (w1 == w && h1 == h && channels == 1)
            {
                // special type 4
                c.create(w1, h1, channels1, elemsize, opt.blob_allocator);
                if (c.empty())
                    return -100;

                #pragma omp parallel for num_threads(opt.num_threads)
                for (int q = 0; q < channels1; q++)
                {
                    const float* ptr = a;
                    const float* ptr1 = b.channel(q);
                    float* outptr = c.channel(q);
                    for (int i = 0; i < size1; i++)
                    {
                        outptr[i] = op(ptr[i], ptr1[i]);
                    }
                }

                return 0;
            }

            // type 19
            c.create(w, h, channels, elemsize, opt.blob_allocator);
            if (c.empty())
                return -100;

            #pragma omp parallel for num_threads(opt.num_threads)
            for (int q=0; q<channels; q++)
            {
@@ -122,6 +174,10 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c, const Option& opt)
            return 0;
        }

        c.create(w, h, channels, elemsize, opt.blob_allocator);
        if (c.empty())
            return -100;

        if (b.dims == 2)
        {
            // type 18
--- a/src/layer/vulkan/binaryop_vulkan.cpp
+++ b/src/layer/vulkan/binaryop_vulkan.cpp
@@ -228,7 +228,8 @@ int BinaryOp_vulkan::create_pipeline(const Option& opt)
            pipeline_binaryop_broadcast_pack4->create(LayerShaderType::binaryop_broadcast_pack4, opt, specializations);
        }

        if (shape.dims == 0 || (shape.dims == 1 && shape.w == 1 && elempack == 1 && elempack1 == 4))
        if (shape.dims == 0 || (shape.dims == 1 && shape.w == 1 && elempack == 1 && elempack1 == 4)
            || (shape.dims == 3 && shape1.dims == 3 && shape1.w == shape.w && shape1.h == shape.h && shape.c == 1 && elempack == 1 && elempack1 == 4))
        {
            pipeline_binaryop_broadcast_a1_pack4 = new Pipeline(vkdev);
            pipeline_binaryop_broadcast_a1_pack4->set_optimal_local_size_xyz(local_size_xyz);
@@ -251,7 +252,8 @@ int BinaryOp_vulkan::create_pipeline(const Option& opt)
            pipeline_binaryop_broadcast_pack8->create(LayerShaderType::binaryop_broadcast_pack8, opt, specializations);
        }

        if ((opt.use_shader_pack8 && shape.dims == 0) || (shape.dims == 1 && shape.w == 1 && elempack == 1 && elempack1 == 8))
        if ((opt.use_shader_pack8 && shape.dims == 0) || (shape.dims == 1 && shape.w == 1 && elempack == 1 && elempack1 == 8)
            || (shape.dims == 3 && shape1.dims == 3 && shape1.w == shape.w && shape1.h == shape.h && shape.c == 1 && elempack == 1 && elempack1 == 8))
        {
            pipeline_binaryop_broadcast_a1_pack8 = new Pipeline(vkdev);
            pipeline_binaryop_broadcast_a1_pack8->set_optimal_local_size_xyz(local_size_xyz);
@@ -391,6 +393,11 @@ int BinaryOp_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector
                // special type 2
                pipeline = out_elempack == 8 ? pipeline_binaryop_broadcast_b1_pack8 : pipeline_binaryop_broadcast_b1_pack4;
            }
            else if (bottom_blob.dims == 3 && bottom_blob1.dims == 3 && bottom_blob1.w == bottom_blob.w && bottom_blob1.h == bottom_blob.h && bottom_blob.c == 1 && bottom_blob.elempack == 1)
            {
                // special type 4
                pipeline = out_elempack == 8 ? pipeline_binaryop_broadcast_a1_pack8 : pipeline_binaryop_broadcast_a1_pack4;
            }
            else
            {
                pipeline = out_elempack == 8 ? pipeline_binaryop_broadcast_pack8 : pipeline_binaryop_broadcast_pack4;
--- a/src/layer/vulkan/shader/binaryop_broadcast.comp
+++ b/src/layer/vulkan/shader/binaryop_broadcast.comp
@@ -102,6 +102,20 @@ void main()
                ai = gi;
                bi = gy * psc(bw) + gx;
            }

            if (psc(aw) == 1 && psc(ah) == 1)
            {
                // special type 3
                ai = gz * psc(acstep);
                bi = gi;
            }

            if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
            {
                // special type 4
                ai = gy * psc(aw) + gx;
                bi = gi;
            }
        }

        if (psc(bdims) == 2)
--- a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp
+++ b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp
@@ -82,8 +82,16 @@ void main()

    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

    int ai = 0;

    if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
    {
        // special type 4
        ai = gy * psc(aw) + gx;
    }

    // type 2 3 4
    afpvec4 v1 = afpvec4(buffer_ld1(a_blob_data, 0));
    afpvec4 v1 = afpvec4(buffer_ld1(a_blob_data, ai));
    afpvec4 v2 = buffer_ld4(b_blob_data, gi);

    afpvec4 res;
--- a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp
+++ b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp
@@ -83,8 +83,16 @@ void main()

    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

    int ai = 0;

    if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
    {
        // special type 2
        ai = gy * psc(bw) + gx;
    }

    // type 2 3 4
    afpvec4 v1 = afpvec4(buffer_ld1(a_blob_data, 0));
    afpvec4 v1 = afpvec4(buffer_ld1(a_blob_data, ai));
    afpvec8 v2 = buffer_ld8(b_blob_data, gi);

    afpvec8 res;
--- a/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp
+++ b/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp
@@ -95,6 +95,13 @@ void main()
                ai = gi;
                bi = gz * psc(bcstep);
            }

            if (psc(aw) == 1 && psc(ah) == 1)
            {
                // special type 3
                ai = gz * psc(acstep);
                bi = gi;
            }
        }

        if (psc(bdims) == 2)
--- a/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp
+++ b/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp
@@ -96,6 +96,13 @@ void main()
                ai = gi;
                bi = gz * psc(bcstep);
            }

            if (psc(aw) == 1 && psc(ah) == 1)
            {
                // special type 3
                ai = gz * psc(acstep);
                bi = gi;
            }
        }

        if (psc(bdims) == 2)
--- a/tests/test_binaryop.cpp
+++ b/tests/test_binaryop.cpp
@@ -452,6 +452,40 @@ static int test_binaryop_s2()
    return 0;
 }

 static int test_binaryop_s3()
 {
    for (int op_type=0; op_type<OP_TYPE_MAX; op_type++)
    {
        int ret = 0
            || test_binaryop(RandomMat(1, 1, 2), RandomMat(2, 3, 2), op_type)
            || test_binaryop(RandomMat(1, 1, 4), RandomMat(2, 3, 4), op_type)
            || test_binaryop(RandomMat(1, 1, 8), RandomMat(2, 3, 8), op_type)
            ;

        if (ret != 0)
            return -1;
    }

    return 0;
 }

 static int test_binaryop_s4()
 {
    for (int op_type=0; op_type<OP_TYPE_MAX; op_type++)
    {
        int ret = 0
            || test_binaryop(RandomMat(2, 3, 1), RandomMat(2, 3, 2), op_type)
            || test_binaryop(RandomMat(2, 3, 1), RandomMat(2, 3, 4), op_type)
            || test_binaryop(RandomMat(2, 3, 1), RandomMat(2, 3, 8), op_type)
            ;

        if (ret != 0)
            return -1;
    }

    return 0;
 }

 int main()
 {
    SRAND(7767517);
@@ -478,5 +512,7 @@ int main()
        || test_binaryop_19()
        || test_binaryop_s1()
        || test_binaryop_s2()
        || test_binaryop_s3()
        || test_binaryop_s4()
        ;
 }