innerproduct produce continous blob, fix #236

8 years ago · 08e261f423
--- a/examples/fasterrcnn.cpp
+++ b/examples/fasterrcnn.cpp
@@ -174,7 +174,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
        ex2.extract("bbox_pred", bbox_pred);
        ex2.extract("cls_prob", cls_prob);

        int num_class = cls_prob.c;
        int num_class = cls_prob.w;
        class_candidates.resize(num_class);

        // find class id with highest score
@@ -182,7 +182,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
        float score = 0.f;
        for (int i=0; i<num_class; i++)
        {
            float class_score = cls_prob.channel(i)[0];
            float class_score = cls_prob[i];
            if (class_score > score)
            {
                label = i;
@@ -206,14 +206,10 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
        float pb_h = y2 - y1 + 1;

        // apply bbox regression
        const float* bbox_xptr = bbox_pred.channel(label * 4);
        const float* bbox_yptr = bbox_pred.channel(label * 4 + 1);
        const float* bbox_wptr = bbox_pred.channel(label * 4 + 2);
        const float* bbox_hptr = bbox_pred.channel(label * 4 + 3);
        float dx = bbox_xptr[0];
        float dy = bbox_yptr[0];
        float dw = bbox_wptr[0];
        float dh = bbox_hptr[0];
        float dx = bbox_pred[label * 4];
        float dy = bbox_pred[label * 4 + 1];
        float dw = bbox_pred[label * 4 + 2];
        float dh = bbox_pred[label * 4 + 3];

        float cx = x1 + pb_w * 0.5f;
        float cy = y1 + pb_h * 0.5f;
--- a/examples/squeezencnn/jni/squeezencnn_jni.cpp
+++ b/examples/squeezencnn/jni/squeezencnn_jni.cpp
@@ -146,7 +146,7 @@ JNIEXPORT jstring JNICALL Java_com_tencent_squeezencnn_SqueezeNcnn_Detect(JNIEnv
        cls_scores.resize(out.c);
        for (int j=0; j<out.c; j++)
        {
            const float* prob = out.data + out.cstep * j;
            const float* prob = out.channel(j);
            cls_scores[j] = prob[0];
        }
    }
--- a/src/layer/arm/innerproduct_arm.cpp
+++ b/src/layer/arm/innerproduct_arm.cpp
@@ -29,47 +29,15 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob) const
    int channels = bottom_blob.c;
    int size = w * h;

    top_blob.create(1, 1, num_output);
    top_blob.create(num_output);
    if (top_blob.empty())
        return -100;

    if (size == 1)
    {
        // num_output
        const float* weight_data_ptr = weight_data;
        #pragma omp parallel for
        for (int p=0; p<num_output; p++)
        {
            float* outptr = top_blob.channel(p);
            float sum = 0.f;

            if (bias_term)
                sum = bias_data[p];

            const float* w = weight_data_ptr + channels * p;

            // channels
            const float* m = bottom_blob;
            for (int q=0; q<channels; q++)
            {
                sum += *m * *w;

                m += 4;
                w++;
            }

            outptr[0] = sum;
        }

        return 0;
    }

    // num_output
    const float* weight_data_ptr = weight_data;
    #pragma omp parallel for
    for (int p=0; p<num_output; p++)
    {
        float* outptr = top_blob.channel(p);
        float sum = 0.f;

        if (bias_term)
@@ -157,7 +125,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob) const
 #endif // __aarch64__
 #endif // __ARM_NEON

        outptr[0] = sum;
        top_blob[p] = sum;
    }

    return 0;
--- a/src/layer/innerproduct.cpp
+++ b/src/layer/innerproduct.cpp
@@ -56,7 +56,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob) const
    int channels = bottom_blob.c;
    int size = w * h;

    top_blob.create(1, 1, num_output);
    top_blob.create(num_output);
    if (top_blob.empty())
        return -100;

@@ -64,7 +64,6 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob) const
    #pragma omp parallel for
    for (int p=0; p<num_output; p++)
    {
        float* outptr = top_blob.channel(p);
        float sum = 0.f;

        if (bias_term)
@@ -82,7 +81,7 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob) const
            }
        }

        outptr[0] = sum;
        top_blob[p] = sum;
    }

    return 0;
--- a/src/layer/scale.cpp
+++ b/src/layer/scale.cpp
@@ -71,7 +71,7 @@ int Scale::forward_inplace(std::vector<Mat>& bottom_top_blobs) const
        {
            float* ptr = bottom_top_blob.channel(q);

            float s = scale_blob.channel(q)[0];
            float s = scale_blob[q];
            float bias = bias_data[q];

            for (int i=0; i<size; i++)
@@ -87,7 +87,7 @@ int Scale::forward_inplace(std::vector<Mat>& bottom_top_blobs) const
        {
            float* ptr = bottom_top_blob.channel(q);

            float s = scale_blob.channel(q)[0];
            float s = scale_blob[q];

            for (int i=0; i<size; i++)
            {