Browse Source

Improve ROIAlign (accelerate ROIAlign, support sampling ratio and aligned ROIAlign) (#1820)

* update roialign

* update tool for roialign

* fix ceil for roialign

* fix ios build

* fix

* fix

* make it compatibile with the old version

* fix ios build

* trigger CI

* fix test

* order

* build

* remove code

* merge roialign

* accelerate ROIAlign

* update note

* rename func

* roialign version

* trigger CI

* fix roialign

* use ref for pre-calc in roialign

* retrigger CI

* pre-alloc
tags/20200616
JackieWu GitHub 6 years ago
parent
commit
ce2251db05
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 633 additions and 42 deletions
  1. +1
    -1
      src/CMakeLists.txt
  2. +113
    -40
      src/layer/roialign.cpp
  3. +4
    -1
      src/layer/roialign.h
  4. +380
    -0
      src/layer/x86/roialign_x86.cpp
  5. +32
    -0
      src/layer/x86/roialign_x86.h
  6. +1
    -0
      tests/CMakeLists.txt
  7. +93
    -0
      tests/test_roialign.cpp
  8. +3
    -0
      tools/caffe/caffe2ncnn.cpp
  9. +3
    -0
      tools/ncnnoptimize.cpp
  10. +3
    -0
      tools/quantize/ncnn2int8.cpp

+ 1
- 1
src/CMakeLists.txt View File

@@ -283,7 +283,7 @@ ncnn_add_layer(Quantize)
ncnn_add_layer(Dequantize)
ncnn_add_layer(Yolov3DetectionOutput)
ncnn_add_layer(PSROIPooling)
ncnn_add_layer(ROIAlign OFF)
ncnn_add_layer(ROIAlign)
ncnn_add_layer(Packing)
ncnn_add_layer(Requantize)
ncnn_add_layer(Cast)


+ 113
- 40
src/layer/roialign.cpp View File

@@ -15,6 +15,7 @@
#include "roialign.h"
#include <math.h>
#include <algorithm>
#include <cassert>

namespace ncnn {

@@ -29,6 +30,16 @@ int ROIAlign::load_param(const ParamDict& pd)
pooled_width = pd.get(0, 0);
pooled_height = pd.get(1, 0);
spatial_scale = pd.get(2, 1.f);
sampling_ratio = pd.get(3, 0);
aligned = pd.get(4, false);
version = pd.get(5, 0);
/*
* version 0:
* the original version of ROIAlign in ncnn
* version 1:
* the version in detectron2
*/
assert(version >= 0 && version <= 1);

return 0;
}
@@ -88,62 +99,124 @@ int ROIAlign::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
float roi_y1 = roi_ptr[1] * spatial_scale;
float roi_x2 = roi_ptr[2] * spatial_scale;
float roi_y2 = roi_ptr[3] * spatial_scale;
if (aligned) {
roi_x1 -= 0.5f; roi_y1 -= 0.5f;
roi_x2 -= 0.5f; roi_y2 -= 0.5f;
}

float roi_w = roi_x2 - roi_x1;
float roi_h = roi_y2 - roi_y1;

float roi_w = std::max(roi_x2 - roi_x1, 1.f);
float roi_h = std::max(roi_y2 - roi_y1, 1.f);
if (!aligned) {
roi_w = std::max(roi_w, 1.f);
roi_h = std::max(roi_h, 1.f);
}

float bin_size_w = roi_w / (float)pooled_width;
float bin_size_h = roi_h / (float)pooled_height;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);

for (int ph = 0; ph < pooled_height; ph++)
if (version == 0) {
// original version
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
for (int pw = 0; pw < pooled_width; pw++)
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);

for (int ph = 0; ph < pooled_height; ph++)
{
// Compute pooling region for this output unit:
// start (included) = ph * roi_height / pooled_height
// end (excluded) = (ph + 1) * roi_height / pooled_height
float hstart = roi_y1 + ph * bin_size_h;
float wstart = roi_x1 + pw * bin_size_w;
float hend = roi_y1 + (ph + 1) * bin_size_h;
float wend = roi_x1 + (pw + 1) * bin_size_w;

hstart = std::min(std::max(hstart, 0.f), (float)h);
wstart = std::min(std::max(wstart, 0.f), (float)w);
hend = std::min(std::max(hend, 0.f), (float)h);
wend = std::min(std::max(wend, 0.f), (float)w);

int bin_grid_h = ceil(hend - hstart);
int bin_grid_w = ceil(wend - wstart);

bool is_empty = (hend <= hstart) || (wend <= wstart);
int area = bin_grid_h * bin_grid_w;

float sum = 0.f;
for (int by = 0; by < bin_grid_h; by++)
for (int pw = 0; pw < pooled_width; pw++)
{
float y = hstart + (by + 0.5f) * bin_size_h / (float)bin_grid_h;

for (int bx = 0; bx < bin_grid_w; bx++)
// Compute pooling region for this output unit:
// start (included) = ph * roi_height / pooled_height
// end (excluded) = (ph + 1) * roi_height / pooled_height
float hstart = roi_y1 + ph * bin_size_h;
float wstart = roi_x1 + pw * bin_size_w;
float hend = roi_y1 + (ph + 1) * bin_size_h;
float wend = roi_x1 + (pw + 1) * bin_size_w;

hstart = std::min(std::max(hstart, 0.f), (float)h);
wstart = std::min(std::max(wstart, 0.f), (float)w);
hend = std::min(std::max(hend, 0.f), (float)h);
wend = std::min(std::max(wend, 0.f), (float)w);

int bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(hend - hstart);
int bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(wend - wstart);

bool is_empty = (hend <= hstart) || (wend <= wstart);
int area = bin_grid_h * bin_grid_w;

float sum = 0.f;
for (int by = 0; by < bin_grid_h; by++)
{
float x = wstart + (bx + 0.5f) * bin_size_w / (float)bin_grid_w;
float y = hstart + (by + 0.5f) * bin_size_h / (float)bin_grid_h;

for (int bx = 0; bx < bin_grid_w; bx++)
{
float x = wstart + (bx + 0.5f) * bin_size_w / (float)bin_grid_w;

// bilinear interpolate at (x,y)
float v = bilinear_interpolate(ptr, w, h, x, y);
// bilinear interpolate at (x,y)
float v = bilinear_interpolate(ptr, w, h, x, y);

sum += v;
sum += v;
}
}

outptr[pw] = is_empty ? 0.f : (sum / (float)area);
}

outptr[pw] = is_empty ? 0.f : (sum / (float)area);
outptr += pooled_width;
}
}
} else if (version == 1)
{
// the version in detectron 2
int roi_bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_h / pooled_height);
int roi_bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_w / pooled_width);

outptr += pooled_width;
const float count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);

for (int ph = 0; ph < pooled_height; ph++)
{
for (int pw = 0; pw < pooled_width; pw++)
{
float sum = 0.f;
for (int by = 0; by < roi_bin_grid_h; by++)
{
float y = roi_y1 + ph * bin_size_h + (by + 0.5f) * bin_size_h / (float)roi_bin_grid_h;

for (int bx = 0; bx < roi_bin_grid_w; bx++)
{
float x = roi_x1 + pw * bin_size_w + (bx + 0.5f) * bin_size_w / (float)roi_bin_grid_w;

if (y < -1.0 || y > h || x < -1.0 || x > w) {
// empty
continue;
} else {
if (y <= 0) y = 0;
if (x <= 0) x = 0;

// bilinear interpolate at (x,y)
float v = bilinear_interpolate(ptr, w, h, x, y);
sum += v;
}
}
}
outptr[pw] = sum / count;
}

outptr += pooled_width;
}
}
}



+ 4
- 1
src/layer/roialign.h View File

@@ -1,6 +1,6 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
@@ -32,6 +32,9 @@ public:
int pooled_width;
int pooled_height;
float spatial_scale;
int sampling_ratio;
bool aligned;
int version;
};

} // namespace ncnn


+ 380
- 0
src/layer/x86/roialign_x86.cpp View File

@@ -0,0 +1,380 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "roialign_x86.h"
#include <math.h>
#include <algorithm>

namespace ncnn {

DEFINE_LAYER_CREATOR(ROIAlign_x86)

// adapted from detectron2
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp
template <typename T>
struct PreCalc {
int pos1;
int pos2;
int pos3;
int pos4;
T w1;
T w2;
T w3;
T w4;
};

template <typename T>
void detectron2_pre_calc_for_bilinear_interpolate(
const int height,
const int width,
const int pooled_height,
const int pooled_width,
const int iy_upper,
const int ix_upper,
T roi_start_h,
T roi_start_w,
T bin_size_h,
T bin_size_w,
int roi_bin_grid_h,
int roi_bin_grid_w,
std::vector<PreCalc<T> >& pre_calc) {
int pre_calc_index = 0;
for (int ph = 0; ph < pooled_height; ph++) {
for (int pw = 0; pw < pooled_width; pw++) {
for (int iy = 0; iy < iy_upper; iy++) {
const T yy = roi_start_h + ph * bin_size_h +
static_cast<T>(iy + .5f) * bin_size_h /
static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
for (int ix = 0; ix < ix_upper; ix++) {
const T xx = roi_start_w + pw * bin_size_w +
static_cast<T>(ix + .5f) * bin_size_w /
static_cast<T>(roi_bin_grid_w);

T x = xx;
T y = yy;
// deal with: inverse elements are out of feature map boundary
if (y < -1.0 || y > height || x < -1.0 || x > width) {
// empty
PreCalc<T> pc;
pc.pos1 = 0;
pc.pos2 = 0;
pc.pos3 = 0;
pc.pos4 = 0;
pc.w1 = 0;
pc.w2 = 0;
pc.w3 = 0;
pc.w4 = 0;
pre_calc[pre_calc_index++] = pc;
continue;
}

if (y <= 0) {
y = 0;
}
if (x <= 0) {
x = 0;
}

int y_low = (int)y;
int x_low = (int)x;
int y_high;
int x_high;

if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = (T)y_low;
} else {
y_high = y_low + 1;
}

if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = (T)x_low;
} else {
x_high = x_low + 1;
}

T ly = y - y_low;
T lx = x - x_low;
T hy = 1. - ly, hx = 1. - lx;
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;

// save weights and indices
PreCalc<T> pc;
pc.pos1 = y_low * width + x_low;
pc.pos2 = y_low * width + x_high;
pc.pos3 = y_high * width + x_low;
pc.pos4 = y_high * width + x_high;
pc.w1 = w1;
pc.w2 = w2;
pc.w3 = w3;
pc.w4 = w4;
pre_calc[pre_calc_index++] = pc;
}
}
}
}
}

template <typename T>
void original_pre_calc_for_bilinear_interpolate(
const int height,
const int width,
const int pooled_height,
const int pooled_width,
T roi_start_h,
T roi_start_w,
T bin_size_h,
T bin_size_w,
int sampling_ratio,
std::vector<PreCalc<T> >& pre_calc) {
int pre_calc_index = 0;
for (int ph = 0; ph < pooled_height; ph++) {
for (int pw = 0; pw < pooled_width; pw++) {
float hstart = roi_start_h + ph * bin_size_h;
float wstart = roi_start_w + pw * bin_size_w;
float hend = roi_start_h + (ph + 1) * bin_size_h;
float wend = roi_start_w + (pw + 1) * bin_size_w;
hstart = std::min(std::max(hstart, 0.f), (float)height);
wstart = std::min(std::max(wstart, 0.f), (float)width);
hend = std::min(std::max(hend, 0.f), (float)height);
wend = std::min(std::max(wend, 0.f), (float)width);

int bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(hend - hstart);
int bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(wend - wstart);

for (int by = 0; by < bin_grid_h; by++) {
float y = hstart + (by + 0.5f) * bin_size_h / (float)bin_grid_h;

for (int bx = 0; bx < bin_grid_w; bx++)
{
float x = wstart + (bx + 0.5f) * bin_size_w / (float)bin_grid_w;
int x0 = x;
int x1 = x0 + 1;
int y0 = y;
int y1 = y0 + 1;

float a0 = x1 - x;
float a1 = x - x0;
float b0 = y1 - y;
float b1 = y - y0;

if (x1 >= width)
{
x1 = width-1;
a0 = 1.f;
a1 = 0.f;
}
if (y1 >= height)
{
y1 = height-1;
b0 = 1.f;
b1 = 0.f;
}
// save weights and indices
PreCalc<T> pc;
pc.pos1 = y0 * width + x0;
pc.pos2 = y0 * width + x1;
pc.pos3 = y1 * width + x0;
pc.pos4 = y1 * width + x1;
pc.w1 = a0 * b0;
pc.w2 = a1 * b0;
pc.w3 = a0 * b1;
pc.w4 = a1 * b1;
pre_calc[pre_calc_index++] = pc;
}
}
}
}
}

ROIAlign_x86::ROIAlign_x86()
{
}

int ROIAlign_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
{
const Mat& bottom_blob = bottom_blobs[0];
const int width = bottom_blob.w;
const int height = bottom_blob.h;
const size_t elemsize = bottom_blob.elemsize;
const int channels = bottom_blob.c;

const Mat& roi_blob = bottom_blobs[1];

Mat& top_blob = top_blobs[0];
top_blob.create(pooled_width, pooled_height, channels, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;

// For each ROI R = [x y w h]: max pool over R
const float* roi_ptr = roi_blob;

float roi_start_w = roi_ptr[0] * spatial_scale;
float roi_start_h = roi_ptr[1] * spatial_scale;
float roi_end_w = roi_ptr[2] * spatial_scale;
float roi_end_h = roi_ptr[3] * spatial_scale;
if (aligned) {
roi_start_w -= 0.5f; roi_start_h -= 0.5f;
roi_end_w -= 0.5f; roi_end_h -= 0.5f;
}

float roi_width = roi_end_w - roi_start_w;
float roi_height = roi_end_h - roi_start_h;

if (!aligned) {
roi_width = std::max(roi_width, 1.f);
roi_height = std::max(roi_height, 1.f);
}

float bin_size_w = (float)roi_width / (float)pooled_width;
float bin_size_h = (float)roi_height / (float)pooled_height;

if (version == 0)
{
// original version
int roi_bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_height / pooled_height);
int roi_bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_width / pooled_width);
std::vector<PreCalc<float> > pre_calc(
roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
original_pre_calc_for_bilinear_interpolate(
height,
width,
pooled_height,
pooled_width,
roi_start_h,
roi_start_w,
bin_size_h,
bin_size_w,
sampling_ratio,
pre_calc);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
int pre_calc_index = 0;

for (int ph = 0; ph < pooled_height; ph++)
{
for (int pw = 0; pw < pooled_width; pw++)
{
// Compute pooling region for this output unit:
// start (included) = ph * roi_height / pooled_height
// end (excluded) = (ph + 1) * roi_height / pooled_height
float hstart = roi_start_h + ph * bin_size_h;
float wstart = roi_start_w + pw * bin_size_w;
float hend = roi_start_h + (ph + 1) * bin_size_h;
float wend = roi_start_w + (pw + 1) * bin_size_w;

hstart = std::min(std::max(hstart, 0.f), (float)height);
wstart = std::min(std::max(wstart, 0.f), (float)width);
hend = std::min(std::max(hend, 0.f), (float)height);
wend = std::min(std::max(wend, 0.f), (float)width);

int bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(hend - hstart);
int bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(wend - wstart);

bool is_empty = (hend <= hstart) || (wend <= wstart);
int area = bin_grid_h * bin_grid_w;

float sum = 0.f;
for (int by = 0; by < bin_grid_h; by++)
{

for (int bx = 0; bx < bin_grid_w; bx++)
{
PreCalc<float> &pc = pre_calc[pre_calc_index++];
// bilinear interpolate at (x,y)
sum += pc.w1 * ptr[pc.pos1] +
pc.w2 * ptr[pc.pos2] +
pc.w3 * ptr[pc.pos3] + pc.w4 * ptr[pc.pos4];

}
}
outptr[pw] = is_empty ? 0.f : (sum / (float)area);
}

outptr += pooled_width;
}
}
} else if (version == 1)
{
// the version in detectron 2
int roi_bin_grid_h = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_height / pooled_height);
int roi_bin_grid_w = sampling_ratio > 0 ?
sampling_ratio : ceil(roi_width / pooled_width);

const float count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1);

std::vector<PreCalc<float> > pre_calc(
roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
detectron2_pre_calc_for_bilinear_interpolate(
height,
width,
pooled_height,
pooled_width,
roi_bin_grid_h,
roi_bin_grid_w,
roi_start_h,
roi_start_w,
bin_size_h,
bin_size_w,
roi_bin_grid_h,
roi_bin_grid_w,
pre_calc);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
int pre_calc_index = 0;

for (int ph = 0; ph < pooled_height; ph++)
{
for (int pw = 0; pw < pooled_width; pw++)
{
float output_val = 0.f;
for (int iy = 0; iy < roi_bin_grid_h; iy++)
{
for (int ix = 0; ix < roi_bin_grid_w; ix++)
{
PreCalc<float> &pc = pre_calc[pre_calc_index++];

output_val += pc.w1 * ptr[pc.pos1] +
pc.w2 * ptr[pc.pos2] +
pc.w3 * ptr[pc.pos3] + pc.w4 * ptr[pc.pos4];
}
}
output_val /= count;
outptr[pw] = output_val;
}
outptr += pooled_width;
}
}
}

return 0;
}

} // namespace ncnn

+ 32
- 0
src/layer/x86/roialign_x86.h View File

@@ -0,0 +1,32 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef LAYER_ROIALIGN_X86_H
#define LAYER_ROIALIGN_X86_H

#include "roialign.h"

namespace ncnn {

class ROIAlign_x86 : virtual public ROIAlign
{
public:
ROIAlign_x86();

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
};

} // namespace ncnn

#endif // LAYER_ROIALIGN_X86_H

+ 1
- 0
tests/CMakeLists.txt View File

@@ -55,6 +55,7 @@ ncnn_add_layer_test(Pooling)
ncnn_add_layer_test(PReLU)
ncnn_add_layer_test(PriorBox)
ncnn_add_layer_test(ROIPooling)
ncnn_add_layer_test(ROIAlign)
ncnn_add_layer_test(ReLU)
ncnn_add_layer_test(Reorg)
ncnn_add_layer_test(Reshape)


+ 93
- 0
tests/test_roialign.cpp View File

@@ -0,0 +1,93 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "testutil.h"

#include "layer.h"
#include "layer/roialign.h"


static int test_roialign(int w, int h, int c,
int pooled_width, int pooled_height, float spatial_scale,
int sampling_ratio, bool aligned, int version)
{
std::vector<ncnn::Mat> a;
a.push_back(RandomMat(w, h, c));
ncnn::Mat b(4);
b[0] = RandomFloat(0.001 , w-2.001); //roi_x1
b[2] = RandomFloat(b[0]+1.001, w-1.001); //roi_x2
b[1] = RandomFloat(0.001 , h-2.001); //roi_y1
b[3] = RandomFloat(b[2]+1.001, h-1.001); //roi_y2
a.push_back(b);

ncnn::ParamDict pd;
pd.set(0, pooled_width); // pooled_width
pd.set(1, pooled_height); // pooled_height
pd.set(2, spatial_scale); // spatial_scale
pd.set(3, sampling_ratio); // sampling_ratio
pd.set(4, aligned); // aligned
pd.set(5, version); // version

std::vector<ncnn::Mat> weights(0);

ncnn::Option opt;
opt.num_threads = 1;
opt.use_vulkan_compute = true;
opt.use_int8_inference = false;

int ret = test_layer<ncnn::ROIAlign>("ROIAlign", pd, weights, opt, a);
if (ret != 0)
{
fprintf(stderr, "test_roialign failed base_w=%d base_h=%d base_c=%d pooled_width=%d pooled_height=%d spatial_scale=%4f.3\n", w, h, c, pooled_width, pooled_height, spatial_scale);
}

return ret;
}

static int test_roialign_0()
{
int ret = 0;
for (int sampling_ratio = 0; sampling_ratio <= 4 && ret == 0; ++sampling_ratio)
{
for (int aligned_i = 0; aligned_i < 2 && ret == 0; ++aligned_i)
{
for (int version = 0; version <= 1; ++version)
{
bool aligned = aligned_i;
int lret = 0
|| test_roialign(112, 112, 16 , 56, 56, 0.50000, sampling_ratio, aligned, version)
|| test_roialign(56 , 56 , 32 , 28, 28, 0.25000, sampling_ratio, aligned, version)
|| test_roialign(28 , 28 , 64 , 14, 14, 0.12500, sampling_ratio, aligned, version)
|| test_roialign(14 , 14 , 128, 27, 17, 0.06250, sampling_ratio, aligned, version)
|| test_roialign(7 , 7 , 256, 3, 3, 0.03125, sampling_ratio, aligned, version)
;
ret |= lret;
}
}
}

if (ret != 0)
return -1;

return 0;
}

int main()
{
SRAND(7767517);

return 0
|| test_roialign_0()
;
}

+ 3
- 0
tools/caffe/caffe2ncnn.cpp View File

@@ -1538,6 +1538,9 @@ int main(int argc, char** argv)
fprintf(pp, " 0=%d", roi_align_param.pooled_w());
fprintf(pp, " 1=%d", roi_align_param.pooled_h());
fprintf(pp, " 2=%e", roi_align_param.spatial_scale());
fprintf(pp, " 3=%d", 0);
fprintf(pp, " 4=%d", false);
fprintf(pp, " 5=%d", 0);
}
else if (layer.type() == "ROIPooling")
{


+ 3
- 0
tools/ncnnoptimize.cpp View File

@@ -2763,6 +2763,9 @@ int NetOptimize::save(const char* parampath, const char* binpath)
fprintf_param_value(" 0=%d", pooled_width)
fprintf_param_value(" 1=%d", pooled_height)
fprintf_param_value(" 2=%e", spatial_scale)
fprintf_param_value(" 3=%d", sampling_ratio)
fprintf_param_value(" 4=%d", aligned)
fprintf_param_value(" 5=%d", version)
}
else if (layer->type == "ROIPooling")
{


+ 3
- 0
tools/quantize/ncnn2int8.cpp View File

@@ -1064,6 +1064,9 @@ int NetQuantize::save(const char* parampath, const char* binpath)
fprintf_param_value(" 0=%d", pooled_width)
fprintf_param_value(" 1=%d", pooled_height)
fprintf_param_value(" 2=%f", spatial_scale)
fprintf_param_value(" 3=%d", sampling_ratio)
fprintf_param_value(" 4=%d", aligned)
fprintf_param_value(" 5=%d", version)
}
else if (layer->type == "ROIPooling")
{


Loading…
Cancel
Save