Browse Source

merge concatv2 dropoutv2 softmaxv2

tags/20171017
nihui 8 years ago
parent
commit
91c08a390a
15 changed files with 473 additions and 959 deletions
  1. +0
    -3
      src/CMakeLists.txt
  2. +6
    -0
      src/layer/arm/softmax_arm.cpp
  3. +162
    -23
      src/layer/concat.cpp
  4. +3
    -0
      src/layer/concat.h
  5. +0
    -277
      src/layer/concatv2.cpp
  6. +0
    -37
      src/layer/concatv2.h
  7. +56
    -2
      src/layer/dropout.cpp
  8. +4
    -0
      src/layer/dropout.h
  9. +0
    -92
      src/layer/dropoutv2.cpp
  10. +0
    -39
      src/layer/dropoutv2.h
  11. +232
    -101
      src/layer/softmax.cpp
  12. +3
    -2
      src/layer/softmax.h
  13. +0
    -306
      src/layer/softmaxv2.cpp
  14. +0
    -37
      src/layer/softmaxv2.h
  15. +7
    -40
      tools/caffe2ncnn.cpp

+ 0
- 3
src/CMakeLists.txt View File

@@ -135,11 +135,8 @@ ncnn_add_layer(ExpandDims)
ncnn_add_layer(Normalize)
ncnn_add_layer(Permute)
ncnn_add_layer(PriorBox)
ncnn_add_layer(ConcatV2)
ncnn_add_layer(SoftmaxV2)
ncnn_add_layer(DetectionOutput)
ncnn_add_layer(Interp)
ncnn_add_layer(DropoutV2)

add_library(ncnn STATIC ${ncnn_SRCS})



+ 6
- 0
src/layer/arm/softmax_arm.cpp View File

@@ -27,6 +27,9 @@ DEFINE_LAYER_CREATOR(Softmax_arm)

int Softmax_arm::forward(const Mat& bottom_blob, Mat& top_blob) const
{
if (axis != 0)
return Softmax::forward(bottom_blob, top_blob);

// value = exp( value - global max value )
// sum all value
// value = value / sum
@@ -156,6 +159,9 @@ int Softmax_arm::forward(const Mat& bottom_blob, Mat& top_blob) const

int Softmax_arm::forward_inplace(Mat& bottom_top_blob) const
{
if (axis != 0)
return Softmax::forward_inplace(bottom_top_blob);

// value = exp( value - global max value )
// sum all value
// value = value / sum


+ 162
- 23
src/layer/concat.cpp View File

@@ -20,13 +20,22 @@ DEFINE_LAYER_CREATOR(Concat)

Concat::Concat()
{
one_blob_only = false;
support_inplace = false;
}

int Concat::load_param(const ParamDict& pd)
{
axis = pd.get(0, 0);

return 0;
}

int Concat::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const
{
int dims = bottom_blobs[0].dims;

if (dims == 1)
if (dims == 1) // axis == 0
{
// concat vector
// total length
@@ -61,7 +70,7 @@ int Concat::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_
return 0;
}

if (dims == 2)
if (dims == 2 && axis == 0)
{
// concat image
int w = bottom_blobs[0].w;
@@ -98,38 +107,168 @@ int Concat::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_
return 0;
}

int w = bottom_blobs[0].w;
int h = bottom_blobs[0].h;
if (dims == 2 && axis == 1)
{
// interleave image row
int h = bottom_blobs[0].h;

// total width
int top_w = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_w += bottom_blob.w;
}

Mat& top_blob = top_blobs[0];
top_blob.create(top_w, h);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int i=0; i<h; i++)
{
float* outptr = top_blob.row(i);
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

const float* ptr = bottom_blob.row(i);
for (int j=0; j<bottom_blob.w; j++)
{
outptr[j] = ptr[j];
}

outptr += bottom_blob.w;
}
}

// total channels
int top_channels = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
return 0;
}

if (dims == 3 && axis == 0)
{
const Mat& bottom_blob = bottom_blobs[b];
top_channels += bottom_blob.c;
// concat dim
int w = bottom_blobs[0].w;
int h = bottom_blobs[0].h;

// total channels
int top_channels = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_channels += bottom_blob.c;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, h, top_channels);
if (top_blob.empty())
return -100;

int q = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int channels = bottom_blob.c;
int size = bottom_blob.cstep * channels;

const float* ptr = bottom_blob;
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i];
}

q += channels;
}

return 0;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, h, top_channels);
if (top_blob.empty())
return -100;
if (dims == 3 && axis == 1)
{
// interleave dim height
int w = bottom_blobs[0].w;
int channels = bottom_blobs[0].c;

// total height
int top_h = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_h += bottom_blob.h;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, top_h, channels);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);

for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int size = bottom_blob.w * bottom_blob.h;

int q = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
const float* ptr = bottom_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i];
}
}
}

return 0;
}

if (dims == 3 && axis == 2)
{
const Mat& bottom_blob = bottom_blobs[b];
// interleave dim width
int h = bottom_blobs[0].h;
int channels = bottom_blobs[0].c;

int channels = bottom_blob.c;
int size = bottom_blob.cstep * channels;
// total height
int top_w = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_w += bottom_blob.w;
}

Mat& top_blob = top_blobs[0];
top_blob.create(top_w, h, channels);
if (top_blob.empty())
return -100;

const float* ptr = bottom_blob;
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
outptr[i] = ptr[i];
float* outptr = top_blob.channel(q);

for (int i=0; i<h; i++)
{
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

const float* ptr = bottom_blob.channel(q).row(i);
for (int j=0; j<bottom_blob.w; j++)
{
outptr[j] = ptr[j];
}

outptr += bottom_blob.w;
}
}
}

q += channels;
return 0;
}

return 0;


+ 3
- 0
src/layer/concat.h View File

@@ -24,9 +24,12 @@ class Concat : public Layer
public:
Concat();

virtual int load_param(const ParamDict& pd);

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const;

public:
int axis;
};

} // namespace ncnn


+ 0
- 277
src/layer/concatv2.cpp View File

@@ -1,277 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "concatv2.h"

namespace ncnn {

DEFINE_LAYER_CREATOR(ConcatV2)

ConcatV2::ConcatV2()
{
one_blob_only = false;
support_inplace = false;
}

int ConcatV2::load_param(const ParamDict& pd)
{
axis = pd.get(0, 0);

return 0;
}

int ConcatV2::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const
{
int dims = bottom_blobs[0].dims;

if (dims == 1) // axis == 0
{
// concat vector
// total length
int top_w = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_w += bottom_blob.w;
}

Mat& top_blob = top_blobs[0];
top_blob.create(top_w);
if (top_blob.empty())
return -100;

float* outptr = top_blob;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int w = bottom_blob.w;

const float* ptr = bottom_blob;
for (int i=0; i<w; i++)
{
outptr[i] = ptr[i];
}

outptr += w;
}

return 0;
}

if (dims == 2 && axis == 0)
{
// concat image
int w = bottom_blobs[0].w;

// total height
int top_h = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_h += bottom_blob.h;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, top_h);
if (top_blob.empty())
return -100;

float* outptr = top_blob;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int size = w * bottom_blob.h;

const float* ptr = bottom_blob;
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i];
}

outptr += size;
}

return 0;
}

if (dims == 2 && axis == 1)
{
// interleave image row
int h = bottom_blobs[0].h;

// total width
int top_w = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_w += bottom_blob.w;
}

Mat& top_blob = top_blobs[0];
top_blob.create(top_w, h);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int i=0; i<h; i++)
{
float* outptr = top_blob.row(i);
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

const float* ptr = bottom_blob.row(i);
for (int j=0; j<bottom_blob.w; j++)
{
outptr[j] = ptr[j];
}

outptr += bottom_blob.w;
}
}

return 0;
}

if (dims == 3 && axis == 0)
{
// concat dim
int w = bottom_blobs[0].w;
int h = bottom_blobs[0].h;

// total channels
int top_channels = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_channels += bottom_blob.c;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, h, top_channels);
if (top_blob.empty())
return -100;

int q = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int channels = bottom_blob.c;
int size = bottom_blob.cstep * channels;

const float* ptr = bottom_blob;
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i];
}

q += channels;
}

return 0;
}

if (dims == 3 && axis == 1)
{
// interleave dim height
int w = bottom_blobs[0].w;
int channels = bottom_blobs[0].c;

// total height
int top_h = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_h += bottom_blob.h;
}

Mat& top_blob = top_blobs[0];
top_blob.create(w, top_h, channels);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);

for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

int size = bottom_blob.w * bottom_blob.h;

const float* ptr = bottom_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i];
}
}
}

return 0;
}

if (dims == 3 && axis == 2)
{
// interleave dim width
int h = bottom_blobs[0].h;
int channels = bottom_blobs[0].c;

// total height
int top_w = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];
top_w += bottom_blob.w;
}

Mat& top_blob = top_blobs[0];
top_blob.create(top_w, h, channels);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);

for (int i=0; i<h; i++)
{
for (size_t b=0; b<bottom_blobs.size(); b++)
{
const Mat& bottom_blob = bottom_blobs[b];

const float* ptr = bottom_blob.channel(q).row(i);
for (int j=0; j<bottom_blob.w; j++)
{
outptr[j] = ptr[j];
}

outptr += bottom_blob.w;
}
}
}

return 0;
}

return 0;
}

} // namespace ncnn

+ 0
- 37
src/layer/concatv2.h View File

@@ -1,37 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_CONCATV2_H
#define LAYER_CONCATV2_H

#include "layer.h"

namespace ncnn {

class ConcatV2 : public Layer
{
public:
ConcatV2();

virtual int load_param(const ParamDict& pd);

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs) const;

public:
int axis;
};

} // namespace ncnn

#endif // LAYER_CONCATV2_H

+ 56
- 2
src/layer/dropout.cpp View File

@@ -24,14 +24,68 @@ Dropout::Dropout()
support_inplace = true;
}

int Dropout::load_param(const ParamDict& pd)
{
scale = pd.get(0, 1.f);

return 0;
}

int Dropout::forward(const Mat& bottom_blob, Mat& top_blob) const
{
top_blob = bottom_blob;
if (scale == 1.f)
{
top_blob = bottom_blob;
return 0;
}

int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);

for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * scale;
}
}

return 0;
}

int Dropout::forward_inplace(Mat& /*bottom_top_blob*/) const
int Dropout::forward_inplace(Mat& bottom_top_blob) const
{
if (scale == 1.f)
{
return 0;
}

int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

for (int i=0; i<size; i++)
{
ptr[i] = ptr[i] * scale;
}
}

return 0;
}



+ 4
- 0
src/layer/dropout.h View File

@@ -24,10 +24,14 @@ class Dropout : public Layer
public:
Dropout();

virtual int load_param(const ParamDict& pd);

virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;

virtual int forward_inplace(Mat& bottom_top_blob) const;

public:
float scale;
};

} // namespace ncnn


+ 0
- 92
src/layer/dropoutv2.cpp View File

@@ -1,92 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "dropoutv2.h"

namespace ncnn {

DEFINE_LAYER_CREATOR(DropoutV2)

DropoutV2::DropoutV2()
{
one_blob_only = true;
support_inplace = true;
}

int DropoutV2::load_param(const ParamDict& pd)
{
scale = pd.get(0, 1.f);

return 0;
}

int DropoutV2::forward(const Mat& bottom_blob, Mat& top_blob) const
{
if (scale == 1.f)
{
top_blob = bottom_blob;
return 0;
}

int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);

for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * scale;
}
}

return 0;
}

int DropoutV2::forward_inplace(Mat& bottom_top_blob) const
{
if (scale == 1.f)
{
return 0;
}

int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

for (int i=0; i<size; i++)
{
ptr[i] = ptr[i] * scale;
}
}

return 0;
}

} // namespace ncnn

+ 0
- 39
src/layer/dropoutv2.h View File

@@ -1,39 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_DROPOUTV2_H
#define LAYER_DROPOUTV2_H

#include "layer.h"

namespace ncnn {

class DropoutV2 : public Layer
{
public:
DropoutV2();

virtual int load_param(const ParamDict& pd);

virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;

virtual int forward_inplace(Mat& bottom_top_blob) const;

public:
float scale;
};

} // namespace ncnn

#endif // LAYER_DROPOUTV2_H

+ 232
- 101
src/layer/softmax.cpp View File

@@ -24,7 +24,14 @@ DEFINE_LAYER_CREATOR(Softmax)
Softmax::Softmax()
{
one_blob_only = true;
support_inplace = true;
support_inplace = false;
}

int Softmax::load_param(const ParamDict& pd)
{
axis = pd.get(0, 0);

return 0;
}

int Softmax::forward(const Mat& bottom_blob, Mat& top_blob) const
@@ -33,140 +40,264 @@ int Softmax::forward(const Mat& bottom_blob, Mat& top_blob) const
// sum all value
// value = value / sum

int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(w, h);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
for (int q=0; q<channels; q++)
if (axis == 0)
{
const float* ptr = bottom_blob.channel(q);
float* maxptr = max;
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

for (int i=0; i<size; i++)
Mat max;
max.create(w, h);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
for (int q=0; q<channels; q++)
{
maxptr[i] = std::max(maxptr[i], ptr[i]);
}
}
const float* ptr = bottom_blob.channel(q);
float* maxptr = max;

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max;
for (int i=0; i<size; i++)
{
maxptr[i] = std::max(maxptr[i], ptr[i]);
}
}

for (int i=0; i<size; i++)
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
outptr[i] = exp(ptr[i] - maxptr[i]);
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max;

for (int i=0; i<size; i++)
{
outptr[i] = exp(ptr[i] - maxptr[i]);
}
}
}

Mat sum;
sum.create(w, h);
if (sum.empty())
return -100;
sum.fill(0.f);
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum;
Mat sum;
sum.create(w, h);
if (sum.empty())
return -100;
sum.fill(0.f);
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum;

for (int i=0; i<size; i++)
for (int i=0; i<size; i++)
{
sumptr[i] += outptr[i];
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
sumptr[i] += outptr[i];
float* outptr = top_blob.channel(q);
float* sumptr = sum;

for (int i=0; i<size; i++)
{
outptr[i] /= sumptr[i];
}
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
}
else if (axis == 1)
{
float* outptr = top_blob.channel(q);
float* sumptr = sum;
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

for (int i=0; i<size; i++)
top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(h, channels);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
outptr[i] /= sumptr[i];
const float* ptr = bottom_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
float max = -FLT_MAX;
for (int j=0; j<w; j++)
{
max = std::max(max, ptr[j]);
}

maxptr[i] = max;
ptr += w;
}
}
}

return 0;
}
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max.row(q);

int Softmax::forward_inplace(Mat& bottom_top_blob) const
{
// value = exp( value - global max value )
// sum all value
// value = value / sum
for (int i=0; i<h; i++)
{
float max = maxptr[i];
for (int j=0; j<w; j++)
{
outptr[j] = exp(ptr[j] - max);
}

int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;

Mat max;
max.create(w, h);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
float* maxptr = max;
ptr += w;
outptr += w;
}
}

for (int i=0; i<size; i++)
Mat sum;
sum.create(h, channels);
if (sum.empty())
return -100;
sum.fill(0.f);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
maxptr[i] = std::max(maxptr[i], ptr[i]);
}
}
const float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
float* maxptr = max;
for (int i=0; i<h; i++)
{
float sum = 0.f;
for (int j=0; j<w; j++)
{
sum += outptr[j];
}

sumptr[i] = sum;
outptr += w;
}
}

for (int i=0; i<size; i++)
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
ptr[i] = exp(ptr[i] - maxptr[i]);
float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
float sum = sumptr[i];
for (int j=0; j<w; j++)
{
outptr[j] /= sum;
}

outptr += w;
}
}
}

Mat sum;
sum.create(w, h);
if (sum.empty())
return -100;
sum.fill(0.f);
for (int q=0; q<channels; q++)
}
else if (axis == 2)
{
const float* ptr = bottom_top_blob.channel(q);
float* sumptr = sum;
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

for (int i=0; i<size; i++)
top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(w, channels);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
sumptr[i] += ptr[i];
const float* ptr = bottom_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
maxptr[j] = std::max(maxptr[j], ptr[j]);
}

ptr += w;
}
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
float* sumptr = sum;
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
outptr[j] = exp(ptr[j] - maxptr[j]);
}

ptr += w;
outptr += w;
}
}

Mat sum;
sum.create(w, channels);
if (sum.empty())
return -100;
sum.fill(0.f);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
sumptr[j] += outptr[j];
}

outptr += w;
}
}

for (int i=0; i<size; i++)
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
ptr[i] /= sumptr[i];
float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
outptr[j] /= sumptr[j];
}

outptr += w;
}
}

}

return 0;


+ 3
- 2
src/layer/softmax.h View File

@@ -24,11 +24,12 @@ class Softmax : public Layer
public:
Softmax();

virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;
virtual int load_param(const ParamDict& pd);

virtual int forward_inplace(Mat& bottom_top_blob) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;

public:
int axis;
};

} // namespace ncnn


+ 0
- 306
src/layer/softmaxv2.cpp View File

@@ -1,306 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "softmaxv2.h"
#include <float.h>
#include <math.h>
#include <algorithm>

namespace ncnn {

DEFINE_LAYER_CREATOR(SoftmaxV2)

SoftmaxV2::SoftmaxV2()
{
one_blob_only = true;
support_inplace = false;
}

int SoftmaxV2::load_param(const ParamDict& pd)
{
axis = pd.get(0, 0);

return 0;
}

int SoftmaxV2::forward(const Mat& bottom_blob, Mat& top_blob) const
{
// value = exp( value - global max value )
// sum all value
// value = value / sum

if (axis == 0)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(w, h);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* maxptr = max;

for (int i=0; i<size; i++)
{
maxptr[i] = std::max(maxptr[i], ptr[i]);
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max;

for (int i=0; i<size; i++)
{
outptr[i] = exp(ptr[i] - maxptr[i]);
}
}

Mat sum;
sum.create(w, h);
if (sum.empty())
return -100;
sum.fill(0.f);
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum;

for (int i=0; i<size; i++)
{
sumptr[i] += outptr[i];
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);
float* sumptr = sum;

for (int i=0; i<size; i++)
{
outptr[i] /= sumptr[i];
}
}

}
else if (axis == 1)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(h, channels);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
float max = -FLT_MAX;
for (int j=0; j<w; j++)
{
max = std::max(max, ptr[j]);
}

maxptr[i] = max;
ptr += w;
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
float max = maxptr[i];
for (int j=0; j<w; j++)
{
outptr[j] = exp(ptr[j] - max);
}

ptr += w;
outptr += w;
}
}

Mat sum;
sum.create(h, channels);
if (sum.empty())
return -100;
sum.fill(0.f);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
float sum = 0.f;
for (int j=0; j<w; j++)
{
sum += outptr[j];
}

sumptr[i] = sum;
outptr += w;
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
float sum = sumptr[i];
for (int j=0; j<w; j++)
{
outptr[j] /= sum;
}

outptr += w;
}
}

}
else if (axis == 2)
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int size = w * h;

top_blob.create(w, h, channels);
if (top_blob.empty())
return -100;

Mat max;
max.create(w, channels);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
maxptr[j] = std::max(maxptr[j], ptr[j]);
}

ptr += w;
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float* maxptr = max.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
outptr[j] = exp(ptr[j] - maxptr[j]);
}

ptr += w;
outptr += w;
}
}

Mat sum;
sum.create(w, channels);
if (sum.empty())
return -100;
sum.fill(0.f);
#pragma omp parallel for
for (int q=0; q<channels; q++)
{
const float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
sumptr[j] += outptr[j];
}

outptr += w;
}
}

#pragma omp parallel for
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);
float* sumptr = sum.row(q);

for (int i=0; i<h; i++)
{
for (int j=0; j<w; j++)
{
outptr[j] /= sumptr[j];
}

outptr += w;
}
}

}

return 0;
}

} // namespace ncnn

+ 0
- 37
src/layer/softmaxv2.h View File

@@ -1,37 +0,0 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_SOFTMAXV2_H
#define LAYER_SOFTMAXV2_H

#include "layer.h"

namespace ncnn {

class SoftmaxV2 : public Layer
{
public:
SoftmaxV2();

virtual int load_param(const ParamDict& pd);

virtual int forward(const Mat& bottom_blob, Mat& top_blob) const;

public:
int axis;
};

} // namespace ncnn

#endif // LAYER_SOFTMAXV2_H

+ 7
- 40
tools/caffe2ncnn.cpp View File

@@ -324,15 +324,7 @@ int main(int argc, char** argv)

// layer definition line, repeated
// [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
if (layer.type() == "Concat")
{
const caffe::ConcatParameter& concat_param = layer.concat_param();
if (concat_param.axis() != 1)
fprintf(pp, "%-16s", "ConcatV2");
else
fprintf(pp, "%-16s", "Concat");
}
else if (layer.type() == "Convolution")
if (layer.type() == "Convolution")
{
const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
if (convolution_param.group() != 1)
@@ -340,14 +332,6 @@ int main(int argc, char** argv)
else
fprintf(pp, "%-16s", "Convolution");
}
else if (layer.type() == "Dropout")
{
const caffe::DropoutParameter& dropout_param = layer.dropout_param();
if (!dropout_param.scale_train())
fprintf(pp, "%-16s", "DropoutV2");
else
fprintf(pp, "%-16s", "Dropout");
}
else if (layer.type() == "Python")
{
const caffe::PythonParameter& python_param = layer.python_param();
@@ -357,14 +341,6 @@ int main(int argc, char** argv)
else
fprintf(pp, "%-16s", python_layer_name.c_str());
}
else if (layer.type() == "Softmax")
{
const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
if (softmax_param.axis() != 1)
fprintf(pp, "%-16s", "SoftmaxV2");
else
fprintf(pp, "%-16s", "Softmax");
}
else
{
fprintf(pp, "%-16s", layer.type().c_str());
@@ -467,11 +443,8 @@ int main(int argc, char** argv)
else if (layer.type() == "Concat")
{
const caffe::ConcatParameter& concat_param = layer.concat_param();
if (concat_param.axis() != 1)
{
int dim = concat_param.axis() >= 1 ? concat_param.axis() - 1 : 0;
fprintf(pp, " 0=%d", dim);
}
int dim = concat_param.axis() - 1;
fprintf(pp, " 0=%d", dim);
}
else if (layer.type() == "Convolution")
{
@@ -604,11 +577,8 @@ int main(int argc, char** argv)
else if (layer.type() == "Dropout")
{
const caffe::DropoutParameter& dropout_param = layer.dropout_param();
if (!dropout_param.scale_train())
{
float scale = 1.f - dropout_param.dropout_ratio();
fprintf(pp, " 0=%f", scale);
}
float scale = 1.f - dropout_param.dropout_ratio();
fprintf(pp, " 0=%f", scale);
}
else if (layer.type() == "Eltwise")
{
@@ -1013,11 +983,8 @@ int main(int argc, char** argv)
else if (layer.type() == "Softmax")
{
const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
if (softmax_param.axis() != 1)
{
int dim = softmax_param.axis() >= 1 ? softmax_param.axis() - 1 : 0;
fprintf(pp, " 0=%d", dim);
}
int dim = softmax_param.axis() - 1;
fprintf(pp, " 0=%d", dim);
}
else if (layer.type() == "Threshold")
{


Loading…
Cancel
Save