implement binaryop and unaryop

9 years ago · dcbc117368
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -119,6 +119,8 @@ ncnn_add_layer(Threshold)
 ncnn_add_layer(Tile OFF)
 ncnn_add_layer(RNN OFF)
 ncnn_add_layer(LSTM OFF)
 ncnn_add_layer(BinaryOp)
 ncnn_add_layer(UnaryOp)

 add_library(ncnn STATIC ${ncnn_SRCS})

--- a/src/layer.h
+++ b/src/layer.h
@@ -130,6 +130,8 @@ enum
    Tile        = 37,
    RNN         = 38,
    LSTM        = 39,
    BinaryOp    = 40,
    UnaryOp     = 41,

    CustomBit   = (1<<8),
 };
--- a/src/layer/binaryop.cpp
+++ b/src/layer/binaryop.cpp
@@ -0,0 +1,182 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #include "binaryop.h"
 #include <math.h>

 namespace ncnn {

 DEFINE_LAYER_CREATOR(BinaryOp)

 BinaryOp::BinaryOp()
 {
    one_blob_only = false;
    support_inplace = false;
 }

 #if NCNN_STDIO
 #if NCNN_STRING
 int BinaryOp::load_param(FILE* paramfp)
 {
    int nscan = fscanf(paramfp, "%d", &op_type);
    if (nscan != 1)
    {
        fprintf(stderr, "BinaryOp load_param failed %d\n", nscan);
        return -1;
    }

    return 0;
 }
 #endif // NCNN_STRING
 int BinaryOp::load_param_bin(FILE* paramfp)
 {
    fread(&op_type, sizeof(int), 1, paramfp);

    return 0;
 }
 #endif // NCNN_STDIO

 int BinaryOp::load_param(const unsigned char*& mem)
 {
    op_type = *(int*)(mem);
    mem += 4;

    return 0;
 }

 int BinaryOp::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const
 {
    const Mat& bottom_blob = bottom_blobs[0];
    const Mat& bottom_blob1 = bottom_blobs[1];

    int w = bottom_blob.w;
    int h = bottom_blob.h;
    int channels = bottom_blob.c;
    int size = w * h;

    Mat& top_blob = top_blobs[0];
    top_blob.create(w, h, channels);
    if (top_blob.empty())
        return -100;

    if (op_type == Operation_ADD)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ptr[i] + ptr1[i];
            }
        }
    }
    else if (op_type == Operation_SUB)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ptr[i] - ptr1[i];
            }
        }
    }
    else if (op_type == Operation_MUL)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ptr[i] * ptr1[i];
            }
        }
    }
    else if (op_type == Operation_DIV)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ptr[i] / ptr1[i];
            }
        }
    }
    else if (op_type == Operation_MAX)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = std::max(ptr[i], ptr1[i]);
            }
        }
    }
    else if (op_type == Operation_MIN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = std::min(ptr[i], ptr1[i]);
            }
        }
    }
    else if (op_type == Operation_POW)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            const float* ptr1 = bottom_blob1.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = pow(ptr[i], ptr1[i]);
            }
        }
    }

    return 0;
 }

 } // namespace ncnn
--- a/src/layer/binaryop.h
+++ b/src/layer/binaryop.h
@@ -0,0 +1,54 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #ifndef LAYER_BINARYOP_H
 #define LAYER_BINARYOP_H

 #include "layer.h"

 namespace ncnn {

 class BinaryOp : public Layer
 {
 public:
    BinaryOp();

 #if NCNN_STDIO
 #if NCNN_STRING
    virtual int load_param(FILE* paramfp);
 #endif // NCNN_STRING
    virtual int load_param_bin(FILE* paramfp);
 #endif // NCNN_STDIO
    virtual int load_param(const unsigned char*& mem);

    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const;

    enum {
        Operation_ADD   = 0,
        Operation_SUB   = 1,
        Operation_MUL   = 2,
        Operation_DIV   = 3,
        Operation_MAX   = 4,
        Operation_MIN   = 5,
        Operation_POW   = 6
    };

 public:
    // param
    int op_type;
 };

 } // namespace ncnn

 #endif // LAYER_BINARYOP_H
--- a/src/layer/unaryop.cpp
+++ b/src/layer/unaryop.cpp
@@ -0,0 +1,489 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #include "unaryop.h"
 #include <math.h>

 namespace ncnn {

 DEFINE_LAYER_CREATOR(UnaryOp)

 UnaryOp::UnaryOp()
 {
    one_blob_only = true;
    support_inplace = true;
 }

 #if NCNN_STDIO
 #if NCNN_STRING
 int UnaryOp::load_param(FILE* paramfp)
 {
    int nscan = fscanf(paramfp, "%d", &op_type);
    if (nscan != 1)
    {
        fprintf(stderr, "UnaryOp load_param failed %d\n", nscan);
        return -1;
    }

    return 0;
 }
 #endif // NCNN_STRING
 int UnaryOp::load_param_bin(FILE* paramfp)
 {
    fread(&op_type, sizeof(int), 1, paramfp);

    return 0;
 }
 #endif // NCNN_STDIO

 int UnaryOp::load_param(const unsigned char*& mem)
 {
    op_type = *(int*)(mem);
    mem += 4;

    return 0;
 }

 int UnaryOp::forward(const Mat& bottom_blob, Mat& top_blob) const
 {
    int w = bottom_blob.w;
    int h = bottom_blob.h;
    int channels = bottom_blob.c;
    int size = w * h;

    top_blob.create(w, h, channels);
    if (top_blob.empty())
        return -100;

    if (op_type == Operation_ABS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = fabs(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_NEG)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = -ptr[i];
            }
        }
    }
    else if (op_type == Operation_FLOOR)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = floor(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_CEIL)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ceil(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_SQUARE)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = ptr[i] * ptr[i];
            }
        }
    }
    else if (op_type == Operation_SQRT)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = sqrt(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_RSQRT)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = 1.f / sqrt(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_EXP)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = exp(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_LOG)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = log(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_SIN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = sin(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_COS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = cos(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_TAN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = tan(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ASIN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = asin(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ACOS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = acos(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ATAN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            const float* ptr = bottom_blob.channel(q);
            float* outptr = top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                outptr[i] = atan(ptr[i]);
            }
        }
    }

    return 0;
 }

 int UnaryOp::forward_inplace(Mat& bottom_top_blob) const
 {
    int w = bottom_top_blob.w;
    int h = bottom_top_blob.h;
    int channels = bottom_top_blob.c;
    int size = w * h;

    if (op_type == Operation_ABS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = fabs(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_NEG)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = -ptr[i];
            }
        }
    }
    else if (op_type == Operation_FLOOR)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = floor(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_CEIL)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = ceil(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_SQUARE)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = ptr[i] * ptr[i];
            }
        }
    }
    else if (op_type == Operation_SQRT)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = sqrt(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_RSQRT)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = 1.f / sqrt(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_EXP)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = exp(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_LOG)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = log(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_SIN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = sin(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_COS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = cos(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_TAN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = tan(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ASIN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = asin(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ACOS)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = acos(ptr[i]);
            }
        }
    }
    else if (op_type == Operation_ATAN)
    {
        #pragma omp parallel for
        for (int q=0; q<channels; q++)
        {
            float* ptr = bottom_top_blob.channel(q);

            for (int i=0; i<size; i++)
            {
                ptr[i] = atan(ptr[i]);
            }
        }
    }

    return 0;
 }

 } // namespace ncnn
--- a/src/layer/unaryop.h
+++ b/src/layer/unaryop.h
@@ -0,0 +1,64 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #ifndef LAYER_UNARYOP_H
 #define LAYER_UNARYOP_H

 #include "layer.h"

 namespace ncnn {

 class UnaryOp : public Layer
 {
 public:
    UnaryOp();

 #if NCNN_STDIO
 #if NCNN_STRING
    virtual int load_param(FILE* paramfp);
 #endif // NCNN_STRING
    virtual int load_param_bin(FILE* paramfp);
 #endif // NCNN_STDIO
    virtual int load_param(const unsigned char*& mem);

    virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;

    virtual int forward_inplace(Mat& bottom_top_blob) const;

    enum {
        Operation_ABS   = 0,
        Operation_NEG   = 1,
        Operation_FLOOR = 2,
        Operation_CEIL  = 3,
        Operation_SQUARE= 4,
        Operation_SQRT  = 5,
        Operation_RSQRT = 6,
        Operation_EXP   = 7,
        Operation_LOG   = 8,
        Operation_SIN   = 9,
        Operation_COS   = 10,
        Operation_TAN   = 11,
        Operation_ASIN  = 12,
        Operation_ACOS  = 13,
        Operation_ATAN  = 14
    };

 public:
    // param
    int op_type;
 };

 } // namespace ncnn

 #endif // LAYER_UNARYOP_H