| @@ -142,9 +142,9 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects) | |||
| in.substract_mean_normalize(mean_vals, 0); | |||
| ncnn::Mat im_info(3); | |||
| im_info.data[0] = h; | |||
| im_info.data[1] = w; | |||
| im_info.data[2] = scale; | |||
| im_info[0] = h; | |||
| im_info[1] = w; | |||
| im_info[2] = scale; | |||
| // step1, extract feature and all rois | |||
| ncnn::Extractor ex1 = fasterrcnn.create_extractor(); | |||
| @@ -182,7 +182,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects) | |||
| float score = 0.f; | |||
| for (int i=0; i<num_class; i++) | |||
| { | |||
| float class_score = cls_prob.channel(i).data[0]; | |||
| float class_score = cls_prob.channel(i)[0]; | |||
| if (class_score > score) | |||
| { | |||
| label = i; | |||
| @@ -197,10 +197,10 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects) | |||
| // fprintf(stderr, "%d = %f\n", label, score); | |||
| // unscale to image size | |||
| float x1 = roi.data[0] / scale; | |||
| float y1 = roi.data[1] / scale; | |||
| float x2 = roi.data[2] / scale; | |||
| float y2 = roi.data[3] / scale; | |||
| float x1 = roi[0] / scale; | |||
| float y1 = roi[1] / scale; | |||
| float x2 = roi[2] / scale; | |||
| float y2 = roi[3] / scale; | |||
| float pb_w = x2 - x1 + 1; | |||
| float pb_h = y2 - y1 + 1; | |||
| @@ -42,7 +42,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores) | |||
| cls_scores.resize(out.c); | |||
| for (int j=0; j<out.c; j++) | |||
| { | |||
| const float* prob = out.data + out.cstep * j; | |||
| const float* prob = out.channel(j); | |||
| cls_scores[j] = prob[0]; | |||
| } | |||
| @@ -519,7 +519,7 @@ static void conv3x3s1_winograd64_transform_kernel_neon(const Mat& kernel, Mat& k | |||
| { | |||
| for (int q = 0; q<inch; q++) | |||
| { | |||
| const float* kernel0 = kernel.data + p*inch * 9 + q * 9; | |||
| const float* kernel0 = (const float*)kernel + p*inch * 9 + q * 9; | |||
| float* kernel_tm0 = kernel_tm.channel(p).row(q); | |||
| // transform kernel, transposed | |||
| @@ -169,10 +169,10 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob) con | |||
| { | |||
| Mat bottom_blob_bordered_g = bottom_blob_bordered.channel(g); | |||
| Mat top_blob_g = top_blob.channel(g); | |||
| Mat weight_data_g(maxk, (float*)(weight_data + maxk * g)); | |||
| Mat weight_data_g(maxk, (void*)((const float*)weight_data + maxk * g)); | |||
| Mat bias_data_g; | |||
| if (bias_term) | |||
| bias_data_g = Mat(1, (float*)(bias_data + g)); | |||
| bias_data_g = Mat(1, (void*)((const float*)bias_data + g)); | |||
| conv(bottom_blob_bordered_g, top_blob_g, weight_data_g, bias_data_g); | |||
| } | |||
| @@ -190,10 +190,10 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob) con | |||
| { | |||
| Mat bottom_blob_bordered_g(w, h, channels_g, bottom_blob_bordered.channel(channels_g * g)); | |||
| Mat top_blob_g(outw, outh, num_output_g, top_blob.channel(num_output_g * g)); | |||
| Mat weight_data_g(maxk * channels_g * num_output_g, (float*)(weight_data + maxk * channels_g * num_output_g * g)); | |||
| Mat weight_data_g(maxk * channels_g * num_output_g, (void*)((const float*)weight_data + maxk * channels_g * num_output_g * g)); | |||
| Mat bias_data_g; | |||
| if (bias_term) | |||
| bias_data_g = Mat(num_output_g, (float*)(bias_data + num_output_g * g)); | |||
| bias_data_g = Mat(num_output_g, (void*)((const float*)bias_data + num_output_g * g)); | |||
| conv(bottom_blob_bordered_g, top_blob_g, weight_data_g, bias_data_g); | |||
| } | |||
| @@ -57,7 +57,7 @@ static void deconv3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| float* outptr = out.data + out.w * i; | |||
| float* outptr = out.row(i); | |||
| float* outptr0 = outptr; | |||
| float* outptr1 = outptr + outw; | |||
| @@ -278,7 +278,7 @@ static void deconv3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| float* outptr = out.data + outw * i*2; | |||
| float* outptr = out.row(i*2); | |||
| float* outptr0 = outptr; | |||
| float* outptr1 = outptr0 + outw; | |||
| @@ -59,7 +59,7 @@ static void deconv4x4s1_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| float* outptr = out.data + out.w * i; | |||
| float* outptr = out.row(i); | |||
| float* outptr0 = outptr; | |||
| float* outptr1 = outptr0 + outw; | |||
| @@ -228,7 +228,7 @@ static void deconv4x4s2_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& _ | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| float* outptr = out.data + out.w * i*2; | |||
| float* outptr = out.row(i*2); | |||
| float* outptr0 = outptr; | |||
| float* outptr1 = outptr0 + outw; | |||
| @@ -90,11 +90,11 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob) c | |||
| { | |||
| Mat top_blob_bordered_g = top_blob_bordered.channel(g); | |||
| Mat bottom_blob_g = bottom_blob.channel(g); | |||
| Mat weight_data_g(maxk, (float*)(weight_data + maxk * g)); | |||
| Mat weight_data_g(maxk, (void*)((const float*)weight_data + maxk * g)); | |||
| Mat bias_data_g; | |||
| if (bias_term) | |||
| bias_data_g = Mat(1, (float*)(bias_data + g)); | |||
| bias_data_g = Mat(1, (void*)((const float*)bias_data + g)); | |||
| deconv(bottom_blob_g, top_blob_bordered_g, weight_data_g, bias_data_g); | |||
| } | |||
| @@ -110,10 +110,10 @@ int DeconvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob) c | |||
| { | |||
| Mat top_blob_bordered_g(outw, outh, num_output_g, top_blob_bordered.channel(num_output_g * g)); | |||
| Mat bottom_blob_g(w, h, channels_g, bottom_blob.channel(channels_g * g).data); | |||
| Mat weight_data_g(maxk * channels_g * num_output_g, (float*)(weight_data + maxk * channels_g * num_output_g * g)); | |||
| Mat weight_data_g(maxk * channels_g * num_output_g, (void*)((const float*)weight_data + maxk * channels_g * num_output_g * g)); | |||
| Mat bias_data_g; | |||
| if (bias_term) | |||
| bias_data_g = Mat(num_output_g, (float*)(bias_data + num_output_g * g)); | |||
| bias_data_g = Mat(num_output_g, (void*)((const float*)bias_data + num_output_g * g)); | |||
| deconv(bottom_blob_g, top_blob_bordered_g, weight_data_g, bias_data_g); | |||
| } | |||
| @@ -44,7 +44,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[p]; | |||
| sum = bias_data[p]; | |||
| const float* w = weight_data_ptr + channels * p; | |||
| @@ -73,7 +73,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[p]; | |||
| sum = bias_data[p]; | |||
| const float* w = weight_data_ptr + size * channels * p; | |||
| @@ -29,7 +29,7 @@ int Slice_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& t | |||
| int channels = bottom_blob.c; | |||
| int q = 0; | |||
| const int* slices_ptr = (const int*)slices.data; | |||
| const int* slices_ptr = slices; | |||
| for (size_t i=0; i<top_blobs.size(); i++) | |||
| { | |||
| int slice = slices_ptr[i]; | |||
| @@ -46,7 +46,7 @@ int Slice_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& t | |||
| int size = bottom_blob.cstep * slice; | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.data; | |||
| float* outptr = top_blob; | |||
| #if __ARM_NEON | |||
| int nn = size >> 3; | |||
| @@ -56,17 +56,12 @@ int BatchNorm::load_model(const ModelBin& mb) | |||
| b_data.create(channels); | |||
| if (b_data.empty()) | |||
| return -100; | |||
| const float* slope_data_ptr = slope_data; | |||
| const float* mean_data_ptr = mean_data; | |||
| const float* var_data_ptr = var_data; | |||
| const float* bias_data_ptr = bias_data; | |||
| float* a_data_ptr = a_data; | |||
| float* b_data_ptr = b_data; | |||
| for (int i=0; i<channels; i++) | |||
| { | |||
| float sqrt_var = sqrt(var_data_ptr[i]); | |||
| a_data_ptr[i] = bias_data_ptr[i] - slope_data_ptr[i] * mean_data_ptr[i] / sqrt_var; | |||
| b_data_ptr[i] = slope_data_ptr[i] / sqrt_var; | |||
| float sqrt_var = sqrt(var_data[i]); | |||
| a_data[i] = bias_data[i] - slope_data[i] * mean_data[i] / sqrt_var; | |||
| b_data[i] = slope_data[i] / sqrt_var; | |||
| } | |||
| return 0; | |||
| @@ -82,15 +77,13 @@ int BatchNorm::forward_inplace(Mat& bottom_top_blob) const | |||
| int h = bottom_top_blob.h; | |||
| int size = w * h; | |||
| const float* a_data_ptr = a_data; | |||
| const float* b_data_ptr = b_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float a = a_data_ptr[q]; | |||
| float b = b_data_ptr[q]; | |||
| float a = a_data[q]; | |||
| float b = b_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -47,13 +47,12 @@ int Bias::forward_inplace(Mat& bottom_top_blob) const | |||
| int channels = bottom_top_blob.c; | |||
| int size = w * h; | |||
| const float* bias_ptr = bias_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float bias = bias_ptr[q]; | |||
| float bias = bias_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -79,7 +79,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = a.channel(q); | |||
| const float* ptr1 = b.data + h * q; | |||
| const float* ptr1 = (const float*)b + h * q; | |||
| float* outptr = c.channel(q); | |||
| for (int y=0; y<h; y++) | |||
| @@ -102,7 +102,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| { | |||
| if (b.w == 1) | |||
| { | |||
| const float b0 = b.data[0]; | |||
| const float b0 = b[0]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -125,7 +125,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = a.channel(q); | |||
| const float b0 = b.data[q]; | |||
| const float b0 = b[q]; | |||
| float* outptr = c.channel(q); | |||
| for (int i=0; i<size; i++) | |||
| @@ -151,7 +151,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels1; q++) | |||
| { | |||
| const float* ptr = a.data + h1 * q; | |||
| const float* ptr = (const float*)a + h1 * q; | |||
| const float* ptr1 = b.channel(q); | |||
| float* outptr = c.channel(q); | |||
| @@ -177,13 +177,9 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (b.dims == 2) | |||
| { | |||
| const float* ptr = a; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = op(ptr[i], ptr1[i]); | |||
| c[i] = op(a[i], b[i]); | |||
| } | |||
| return 0; | |||
| @@ -197,25 +193,21 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (b.w == 1) | |||
| { | |||
| const float* ptr = a; | |||
| const float b0 = b.data[0]; | |||
| float* outptr = c; | |||
| const float b0 = b[0]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = op(ptr[i], b0); | |||
| c[i] = op(a[i], b0); | |||
| } | |||
| return 0; | |||
| } | |||
| const float* ptr = a; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| for (int y=0; y<h; y++) | |||
| { | |||
| const float b0 = ptr1[y]; | |||
| const float b0 = b[y]; | |||
| for (int x=0; x<w; x++) | |||
| { | |||
| outptr[x] = op(ptr[x], b0); | |||
| @@ -238,7 +230,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (c.empty()) | |||
| return -100; | |||
| const float a0 = a.data[0]; | |||
| const float a0 = a[0]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels1; q++) | |||
| { | |||
| @@ -263,13 +255,10 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (c.empty()) | |||
| return -100; | |||
| const float a0 = a.data[0]; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| const float a0 = a[0]; | |||
| for (int i=0; i<size1; i++) | |||
| { | |||
| outptr[i] = op(a0, ptr1[i]); | |||
| c[i] = op(a0, b[i]); | |||
| } | |||
| return 0; | |||
| @@ -281,13 +270,10 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (c.empty()) | |||
| return -100; | |||
| const float a0 = a.data[0]; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| const float a0 = a[0]; | |||
| for (int i=0; i<size1; i++) | |||
| { | |||
| outptr[i] = op(a0, ptr1[i]); | |||
| c[i] = op(a0, b[i]); | |||
| } | |||
| return 0; | |||
| @@ -303,7 +289,7 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels1; q++) | |||
| { | |||
| const float a0 = a.data[q]; | |||
| const float a0 = a[q]; | |||
| const float* ptr1 = b.channel(q); | |||
| float* outptr = c.channel(q); | |||
| @@ -325,13 +311,12 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (c.empty()) | |||
| return -100; | |||
| const float* ptr = a; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| for (int y=0; y<h1; y++) | |||
| { | |||
| const float a0 = ptr[y]; | |||
| const float a0 = a[y]; | |||
| for (int x=0; x<w1; x++) | |||
| { | |||
| outptr[x] = op(a0, ptr1[x]); | |||
| @@ -352,25 +337,18 @@ static int binary_op(const Mat& a, const Mat& b, Mat& c) | |||
| if (b.w == 1) | |||
| { | |||
| const float* ptr = a; | |||
| const float b0 = b.data[0]; | |||
| float* outptr = c; | |||
| const float b0 = b[0]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = op(ptr[i], b0); | |||
| c[i] = op(a[i], b0); | |||
| } | |||
| return 0; | |||
| } | |||
| const float* ptr = a; | |||
| const float* ptr1 = b; | |||
| float* outptr = c; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = op(ptr[i], ptr1[i]); | |||
| c[i] = op(a[i], b[i]); | |||
| } | |||
| } | |||
| } | |||
| @@ -125,7 +125,6 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| } | |||
| // num_output | |||
| const float* weight_data_ptr = weight_data; | |||
| #pragma omp parallel for | |||
| for (int p=0; p<num_output; p++) | |||
| { | |||
| @@ -138,9 +137,9 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[p]; | |||
| sum = bias_data[p]; | |||
| const float* kptr = weight_data_ptr + maxk * channels * p; | |||
| const float* kptr = (const float*)weight_data + maxk * channels * p; | |||
| // channels | |||
| for (int q=0; q<channels; q++) | |||
| @@ -118,7 +118,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int g=0; g<group; g++) | |||
| { | |||
| float* outptr = top_blob.channel(g); | |||
| const float* kptr = weight_data + maxk * g; | |||
| const float* kptr = (const float*)weight_data + maxk * g; | |||
| const Mat m = bottom_blob_bordered.channel(g); | |||
| for (int i = 0; i < outh; i++) | |||
| @@ -128,7 +128,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[g]; | |||
| sum = bias_data[g]; | |||
| const float* sptr = m.row(i*stride_h) + j*stride_w; | |||
| @@ -158,7 +158,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int p=0; p<num_output_g; p++) | |||
| { | |||
| float* outptr = top_blob.channel(g * num_output_g + p); | |||
| const float* weight_data_ptr = weight_data + maxk * channels_g * num_output_g * g; | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| @@ -167,7 +167,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[num_output_g * g + p]; | |||
| sum = bias_data[num_output_g * g + p]; | |||
| const float* kptr = weight_data_ptr + maxk * channels_g * p; | |||
| @@ -101,13 +101,12 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| } | |||
| // num_output | |||
| const float* weight_data_ptr = weight_data; | |||
| #pragma omp parallel for | |||
| for (int p=0; p<num_output; p++) | |||
| { | |||
| Mat out = top_blob_bordered.channel(p); | |||
| const float bias = bias_term ? bias_data.data[p] : 0.f; | |||
| const float bias = bias_term ? bias_data[p] : 0.f; | |||
| out.fill(bias); | |||
| @@ -117,13 +116,13 @@ int Deconvolution::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| float* outptr = out.row(i*stride_h) + j*stride_w; | |||
| const float* kptr = weight_data_ptr + maxk * channels * p; | |||
| const float* kptr = (const float*)weight_data + maxk * channels * p; | |||
| // channels | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(q); | |||
| float val = *(m.data + m.w * i + j); | |||
| float val = *(m.row(i) + j); | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| @@ -92,10 +92,10 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int g=0; g<group; g++) | |||
| { | |||
| const float* inptr = bottom_blob.channel(g); | |||
| const float* kptr = weight_data + maxk * g; | |||
| const float* kptr = (const float*)weight_data + maxk * g; | |||
| Mat m = top_blob_bordered.channel(g); | |||
| const float bias = bias_term ? bias_data.data[g] : 0.f; | |||
| const float bias = bias_term ? bias_data[g] : 0.f; | |||
| m.fill(bias); | |||
| @@ -124,12 +124,12 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| #pragma omp parallel for | |||
| for (int g = 0; g < group; g++) | |||
| { | |||
| const float* weight_data_ptr = weight_data + maxk * channels_g * num_output_g * g; | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; | |||
| for (int p = 0; p < num_output_g; p++) | |||
| { | |||
| Mat out = top_blob_bordered.channel(g * num_output_g + p); | |||
| const float bias = bias_term ? bias_data.data[g * num_output_g + p] : 0.f; | |||
| const float bias = bias_term ? bias_data[g * num_output_g + p] : 0.f; | |||
| out.fill(bias); | |||
| @@ -145,7 +145,7 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int q = 0; q < channels_g; q++) | |||
| { | |||
| const Mat m = bottom_blob.channel(channels_g * g + q); | |||
| float val = *(m.data + w * i + j); | |||
| float val = *(m.row(i) + j); | |||
| for (int k = 0; k < maxk; k++) | |||
| { | |||
| @@ -201,7 +201,7 @@ int DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::vector<M | |||
| for (int j = 0; j < num_prior; j++) | |||
| { | |||
| float score = confidence.data[j * num_class + i]; | |||
| float score = confidence[j * num_class + i]; | |||
| if (score > confidence_threshold) | |||
| { | |||
| @@ -114,12 +114,10 @@ int Eltwise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top | |||
| } | |||
| else | |||
| { | |||
| const float* coeffs_ptr = coeffs; | |||
| // first blob | |||
| const Mat& bottom_blob1 = bottom_blobs[1]; | |||
| float coeff0 = coeffs_ptr[0]; | |||
| float coeff1 = coeffs_ptr[1]; | |||
| float coeff0 = coeffs[0]; | |||
| float coeff1 = coeffs[1]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -136,7 +134,7 @@ int Eltwise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top | |||
| for (size_t b=2; b<bottom_blobs.size(); b++) | |||
| { | |||
| const Mat& bottom_blob1 = bottom_blobs[b]; | |||
| float coeff = coeffs_ptr[b]; | |||
| float coeff = coeffs[b]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -65,7 +65,7 @@ int Embed::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| #pragma omp parallel for | |||
| for (int q=0; q<words; q++) | |||
| { | |||
| float* outptr = top_blob.data + top_blob.w * q; | |||
| float* outptr = (float*)top_blob + top_blob.w * q; | |||
| int word_index = (int)word_ptr[q]; | |||
| @@ -79,7 +79,7 @@ int Embed::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| for (int p=0; p<num_output; p++) | |||
| { | |||
| outptr[p] += bias_data.data[p]; | |||
| outptr[p] += bias_data[p]; | |||
| } | |||
| } | |||
| } | |||
| @@ -39,7 +39,7 @@ int Flatten::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.data + size * q; | |||
| float* outptr = (float*)top_blob + size * q; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -61,7 +61,6 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| return -100; | |||
| // num_output | |||
| const float* weight_data_ptr = weight_data; | |||
| #pragma omp parallel for | |||
| for (int p=0; p<num_output; p++) | |||
| { | |||
| @@ -69,12 +68,12 @@ int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sum = 0.f; | |||
| if (bias_term) | |||
| sum = bias_data.data[p]; | |||
| sum = bias_data[p]; | |||
| // channels | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* w = weight_data_ptr + size * channels * p + size * q; | |||
| const float* w = (const float*)weight_data + size * channels * p + size * q; | |||
| const float* m = bottom_blob.channel(q); | |||
| for (int i = 0; i < size; i++) | |||
| @@ -97,7 +97,7 @@ int LSTM::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_bl | |||
| // 0 otherwise | |||
| // calculate hidden | |||
| // gate_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c | |||
| const float cont = cont_blob.data[t]; | |||
| const float cont = cont_blob[t]; | |||
| const Mat x = input_blob.channel(t); | |||
| float* hidden_data = hidden; | |||
| for (int q=0; q<num_output; q++) | |||
| @@ -105,18 +105,18 @@ int LSTM::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_bl | |||
| float h_cont = cont ? hidden_data[q] : 0.f; | |||
| const float* x_data = x; | |||
| const float* bias_c_data_ptr = bias_c_data.data + 4 * q; | |||
| float* gates_data = gates.data + 4 * q; | |||
| const float* bias_c_data_ptr = (const float*)bias_c_data + 4 * q; | |||
| float* gates_data = (float*)gates + 4 * q; | |||
| // gate I F O G | |||
| const float* weight_hc_data_I = weight_hc_data.data + weight_hc_data.w * q; | |||
| const float* weight_xc_data_I = weight_xc_data.data + weight_xc_data.w * q; | |||
| const float* weight_hc_data_F = weight_hc_data.data + weight_hc_data.w * q + size; | |||
| const float* weight_xc_data_F = weight_xc_data.data + weight_xc_data.w * q + size; | |||
| const float* weight_hc_data_O = weight_hc_data.data + weight_hc_data.w * q + size*2; | |||
| const float* weight_xc_data_O = weight_xc_data.data + weight_xc_data.w * q + size*2; | |||
| const float* weight_hc_data_G = weight_hc_data.data + weight_hc_data.w * q + size*3; | |||
| const float* weight_xc_data_G = weight_xc_data.data + weight_xc_data.w * q + size*3; | |||
| const float* weight_hc_data_I = (const float*)weight_hc_data + weight_hc_data.w * q; | |||
| const float* weight_xc_data_I = (const float*)weight_xc_data + weight_xc_data.w * q; | |||
| const float* weight_hc_data_F = (const float*)weight_hc_data + weight_hc_data.w * q + size; | |||
| const float* weight_xc_data_F = (const float*)weight_xc_data + weight_xc_data.w * q + size; | |||
| const float* weight_hc_data_O = (const float*)weight_hc_data + weight_hc_data.w * q + size*2; | |||
| const float* weight_xc_data_O = (const float*)weight_xc_data + weight_xc_data.w * q + size*2; | |||
| const float* weight_hc_data_G = (const float*)weight_hc_data + weight_hc_data.w * q + size*3; | |||
| const float* weight_xc_data_G = (const float*)weight_xc_data + weight_xc_data.w * q + size*3; | |||
| float I = bias_c_data_ptr[0]; | |||
| float F = bias_c_data_ptr[1]; | |||
| @@ -148,7 +148,7 @@ int LSTM::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_bl | |||
| float* output_data = output; | |||
| for (int q=0; q<num_output; q++) | |||
| { | |||
| float* gates_data = gates.data + 4 * q; | |||
| float* gates_data = (float*)gates + 4 * q; | |||
| float I = gates_data[0]; | |||
| float F = gates_data[1]; | |||
| @@ -49,20 +49,19 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| Mat sum(channels); | |||
| if (sum.empty()) | |||
| return -100; | |||
| float* sum_ptr = sum; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float sum = 0.f; | |||
| float s = 0.f; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| sum += ptr[i]; | |||
| s += ptr[i]; | |||
| } | |||
| sum_ptr[q] = sum; | |||
| sum[q] = s; | |||
| } | |||
| if (across_channels) | |||
| @@ -71,7 +70,7 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float mean = 0.f; | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| mean += sum_ptr[q]; | |||
| mean += sum[q]; | |||
| } | |||
| mean = mean / (channels * size); | |||
| @@ -96,7 +95,7 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.channel(q); | |||
| float mean = sum_ptr[q] / size; | |||
| float mean = sum[q] / size; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -111,20 +110,19 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| Mat sqsum(channels); | |||
| if (sqsum.empty()) | |||
| return -100; | |||
| float* sqsum_ptr = sqsum; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = top_blob.channel(q); | |||
| float sum = 0.f; | |||
| float s = 0.f; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| sum += ptr[i] * ptr[i]; | |||
| s += ptr[i] * ptr[i]; | |||
| } | |||
| sqsum_ptr[q] = sum; | |||
| sqsum[q] = s; | |||
| } | |||
| if (across_channels) | |||
| @@ -133,7 +131,7 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| float sqmean = 0.f; | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| sqmean += sqsum_ptr[q]; | |||
| sqmean += sqsum[q]; | |||
| } | |||
| sqmean = sqmean / (channels * size); | |||
| @@ -160,7 +158,7 @@ int MVN::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* outptr = top_blob.channel(q); | |||
| float sqmean = sqsum_ptr[q] / size; | |||
| float sqmean = sqsum[q] / size; | |||
| float norm_var = sqrt(sqmean) + eps; | |||
| float norm_var_inv = 1.f / norm_var; | |||
| @@ -63,7 +63,6 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| if (square_sum_blob.empty()) | |||
| return -100; | |||
| float* square_sum_ptr = square_sum_blob; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -75,14 +74,14 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| ssum += ptr[i] * ptr[i]; | |||
| } | |||
| square_sum_ptr[q] = ssum; | |||
| square_sum_blob[q] = ssum; | |||
| } | |||
| // sum + eps | |||
| float ssum = eps; | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| ssum += square_sum_ptr[q]; | |||
| ssum += square_sum_blob[q]; | |||
| } | |||
| // 1 / sqrt(ssum) | |||
| @@ -90,7 +89,7 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| if (channel_shared) | |||
| { | |||
| float scale = a * scale_data.data[0]; | |||
| float scale = a * scale_data[0]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| @@ -111,7 +110,7 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.channel(q); | |||
| float scale = a * scale_data.data[q]; | |||
| float scale = a * scale_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -124,15 +123,13 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| // square sum, 1 / sqrt(ssum) | |||
| Mat square_sum_blob; | |||
| square_sum_blob.create(w, h); | |||
| square_sum_blob.create(size); | |||
| if (square_sum_blob.empty()) | |||
| return -100; | |||
| float* ssptr = square_sum_blob; | |||
| if (channel_shared) | |||
| { | |||
| float scale = scale_data.data[0]; | |||
| float scale = scale_data[0]; | |||
| #pragma omp parallel for | |||
| for (int i=0; i<size; i++) | |||
| @@ -144,7 +141,7 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| ssum += ptr[i] * ptr[i]; | |||
| } | |||
| ssptr[i] = 1.f / sqrt(ssum) * scale; | |||
| square_sum_blob[i] = 1.f / sqrt(ssum) * scale; | |||
| } | |||
| #pragma omp parallel for | |||
| @@ -155,7 +152,7 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = ptr[i] * ssptr[i]; | |||
| outptr[i] = ptr[i] * square_sum_blob[i]; | |||
| } | |||
| } | |||
| } | |||
| @@ -171,7 +168,7 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| ssum += ptr[i] * ptr[i]; | |||
| } | |||
| ssptr[i] = 1.f / sqrt(ssum); | |||
| square_sum_blob[i] = 1.f / sqrt(ssum); | |||
| } | |||
| #pragma omp parallel for | |||
| @@ -179,11 +176,11 @@ int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.channel(q); | |||
| float scale = scale_data.data[q]; | |||
| float scale = scale_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = ptr[i] * ssptr[i] * scale; | |||
| outptr[i] = ptr[i] * square_sum_blob[i] * scale; | |||
| } | |||
| } | |||
| } | |||
| @@ -249,7 +249,8 @@ int Pooling::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| const float scale = (float)kernel_w / wtail; | |||
| outptr = top_blob.channel(q) + outw - 1; | |||
| outptr = top_blob.channel(q); | |||
| outptr += outw - 1; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| *outptr *= scale; | |||
| @@ -47,13 +47,11 @@ int PReLU::forward_inplace(Mat& bottom_top_blob) const | |||
| int channels = bottom_top_blob.c; | |||
| int size = w * h; | |||
| const float* slope_data_ptr = slope_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float slope = num_slope > 1 ? slope_data_ptr[q] : slope_data_ptr[0]; | |||
| float slope = num_slope > 1 ? slope_data[q] : slope_data[0]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -79,7 +79,7 @@ int PriorBox::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| #pragma omp parallel for | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| float* box = top_blob.data + i * w * num_prior * 4; | |||
| float* box = (float*)top_blob + i * w * num_prior * 4; | |||
| float center_x = offset * step_w; | |||
| float center_y = offset * step_h + i * step_h; | |||
| @@ -91,7 +91,7 @@ int PriorBox::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| for (int k = 0; k < num_min_size; k++) | |||
| { | |||
| float min_size = min_sizes.data[k]; | |||
| float min_size = min_sizes[k]; | |||
| // min size box | |||
| box_w = box_h = min_size; | |||
| @@ -105,7 +105,7 @@ int PriorBox::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| if (num_max_size > 0) | |||
| { | |||
| float max_size = max_sizes.data[k]; | |||
| float max_size = max_sizes[k]; | |||
| // max size box | |||
| box_w = box_h = sqrt(min_size * max_size); | |||
| @@ -28,14 +28,14 @@ Proposal::Proposal() | |||
| // TODO load from param | |||
| ratios.create(3); | |||
| ratios.data[0] = 0.5f; | |||
| ratios.data[1] = 1.f; | |||
| ratios.data[2] = 2.f; | |||
| ratios[0] = 0.5f; | |||
| ratios[1] = 1.f; | |||
| ratios[2] = 2.f; | |||
| scales.create(3); | |||
| scales.data[0] = 8.f; | |||
| scales.data[1] = 16.f; | |||
| scales.data[2] = 32.f; | |||
| scales[0] = 8.f; | |||
| scales[1] = 16.f; | |||
| scales[2] = 32.f; | |||
| } | |||
| static Mat generate_anchors(int base_size, const Mat& ratios, const Mat& scales) | |||
| @@ -51,14 +51,14 @@ static Mat generate_anchors(int base_size, const Mat& ratios, const Mat& scales) | |||
| for (int i = 0; i < num_ratio; i++) | |||
| { | |||
| float ar = ratios.data[i]; | |||
| float ar = ratios[i]; | |||
| int r_w = round(base_size / sqrt(ar)); | |||
| int r_h = round(r_w * ar);//round(base_size * sqrt(ar)); | |||
| for (int j = 0; j < num_scale; j++) | |||
| { | |||
| float scale = scales.data[j]; | |||
| float scale = scales[j]; | |||
| float rs_w = r_w * scale; | |||
| float rs_h = r_h * scale; | |||
| @@ -269,8 +269,8 @@ int Proposal::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| } | |||
| // clip predicted boxes to image | |||
| float im_w = im_info_blob.data[1]; | |||
| float im_h = im_info_blob.data[0]; | |||
| float im_w = im_info_blob[1]; | |||
| float im_h = im_info_blob[0]; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<num_anchors; q++) | |||
| @@ -293,7 +293,7 @@ int Proposal::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| std::vector<Rect> proposal_boxes; | |||
| std::vector<float> scores; | |||
| float im_scale = im_info_blob.data[2]; | |||
| float im_scale = im_info_blob[2]; | |||
| float min_boxsize = min_size * im_scale; | |||
| for (int q=0; q<num_anchors; q++) | |||
| @@ -82,7 +82,7 @@ static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff) | |||
| Mat sums(channels); | |||
| if (sums.empty()) | |||
| return -100; | |||
| float* sums_ptr = sums; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -94,22 +94,19 @@ static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff) | |||
| sum = op(sum, ptr[i]); | |||
| } | |||
| sums_ptr[q] = sum; | |||
| sums[q] = sum; | |||
| } | |||
| float* outptr = b; | |||
| float sum = v0; | |||
| for (int i=0; i<channels; i++) | |||
| { | |||
| sum = op2(sum, sums_ptr[i]); | |||
| sum = op2(sum, sums[i]); | |||
| } | |||
| outptr[0] = sum * coeff; | |||
| b[0] = sum * coeff; | |||
| } | |||
| else if (dim == 1) | |||
| { | |||
| float* outptr = b; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| @@ -121,7 +118,7 @@ static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff) | |||
| sum = op(sum, ptr[i]); | |||
| } | |||
| outptr[q] = sum * coeff; | |||
| b[q] = sum * coeff; | |||
| } | |||
| } | |||
| else if (dim == 2) | |||
| @@ -173,19 +170,18 @@ static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff) | |||
| b.fill(v0); | |||
| float* outptr = b; | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* mins_ptr = mins.channel(q); | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| outptr[j] = op2(outptr[j], mins_ptr[j]); | |||
| b[j] = op2(b[j], mins_ptr[j]); | |||
| } | |||
| } | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| outptr[j] *= coeff; | |||
| b[j] *= coeff; | |||
| } | |||
| } | |||
| else if (dim == -2) | |||
| @@ -195,18 +191,16 @@ static int reduction_op(const Mat& a, Mat& b, float v0, int dim, float coeff) | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = a.channel(q); | |||
| float* outptr = b; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] = op(outptr[i], ptr[i]); | |||
| b[i] = op(b[i], ptr[i]); | |||
| } | |||
| } | |||
| float* outptr = b; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] *= coeff; | |||
| b[i] *= coeff; | |||
| } | |||
| } | |||
| @@ -257,15 +251,13 @@ int Reduction::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| if (dim == 0) | |||
| { | |||
| float* outptr = top_blob; | |||
| outptr[0] /= channels * size; | |||
| top_blob[0] /= channels * size; | |||
| } | |||
| else if (dim == 1) | |||
| { | |||
| float* outptr = top_blob; | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| outptr[q] /= size; | |||
| top_blob[q] /= size; | |||
| } | |||
| } | |||
| else if (dim == 2) | |||
| @@ -282,18 +274,16 @@ int Reduction::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| } | |||
| else if (dim == -1) | |||
| { | |||
| float* outptr = top_blob; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| outptr[j] /= h * channels; | |||
| top_blob[j] /= h * channels; | |||
| } | |||
| } | |||
| else if (dim == -2) | |||
| { | |||
| float* outptr = top_blob; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| outptr[i] /= channels; | |||
| top_blob[i] /= channels; | |||
| } | |||
| } | |||
| } | |||
| @@ -92,18 +92,18 @@ int RNN::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blo | |||
| // 0 otherwise | |||
| // calculate hidden | |||
| // h_t = tanh( W_hh * h_cont_{t-1} + W_xh * x_t + b_h ) | |||
| const float cont = cont_blob.data[t]; | |||
| const float cont = cont_blob[t]; | |||
| const Mat x = input_blob.channel(t); | |||
| float* hidden_data = hidden; | |||
| for (int q=0; q<num_output; q++) | |||
| { | |||
| float h_cont = cont ? hidden_data[q] : 0.f; | |||
| const float* weight_hh_data_ptr = weight_hh_data.data + weight_hh_data.w * q; | |||
| const float* weight_xh_data_ptr = weight_xh_data.data + weight_xh_data.w * q; | |||
| const float* weight_hh_data_ptr = (const float*)weight_hh_data + weight_hh_data.w * q; | |||
| const float* weight_xh_data_ptr = (const float*)weight_xh_data + weight_xh_data.w * q; | |||
| const float* x_data = x; | |||
| float s0 = bias_h_data.data[q]; | |||
| float s0 = bias_h_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| s0 += weight_hh_data_ptr[i] * h_cont + weight_xh_data_ptr[i] * x_data[i]; | |||
| @@ -118,9 +118,9 @@ int RNN::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blo | |||
| float* output_data = output; | |||
| for (int q=0; q<num_output; q++) | |||
| { | |||
| const float* weight_ho_data_ptr = weight_ho_data.data + weight_ho_data.w * q; | |||
| const float* weight_ho_data_ptr = (const float*)weight_ho_data + weight_ho_data.w * q; | |||
| float s0 = bias_o_data.data[q]; | |||
| float s0 = bias_o_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| s0 += weight_ho_data_ptr[i] * hidden_data[i]; | |||
| @@ -66,14 +66,13 @@ int Scale::forward_inplace(std::vector<Mat>& bottom_top_blobs) const | |||
| if (bias_term) | |||
| { | |||
| const float* bias_ptr = bias_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float s = scale_blob.channel(q)[0]; | |||
| float bias = bias_ptr[q]; | |||
| float bias = bias_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -109,15 +108,13 @@ int Scale::forward_inplace(Mat& bottom_top_blob) const | |||
| if (bias_term) | |||
| { | |||
| const float* scale_ptr = scale_data; | |||
| const float* bias_ptr = bias_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float s = scale_ptr[q]; | |||
| float bias = bias_ptr[q]; | |||
| float s = scale_data[q]; | |||
| float bias = bias_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -127,13 +124,12 @@ int Scale::forward_inplace(Mat& bottom_top_blob) const | |||
| } | |||
| else | |||
| { | |||
| const float* scale_ptr = scale_data; | |||
| #pragma omp parallel for | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float s = scale_ptr[q]; | |||
| float s = scale_data[q]; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| @@ -37,7 +37,7 @@ int Slice::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_b | |||
| int channels = bottom_blob.c; | |||
| int q = 0; | |||
| const int* slices_ptr = (const int*)slices.data; | |||
| const int* slices_ptr = slices; | |||
| for (size_t i=0; i<top_blobs.size(); i++) | |||
| { | |||
| int slice = slices_ptr[i]; | |||
| @@ -54,11 +54,8 @@ int Slice::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_b | |||
| int size = bottom_blob.cstep * slice; | |||
| const float* ptr = bottom_blob.channel(q); | |||
| float* outptr = top_blob.data; | |||
| for (int j=0; j<size; j++) | |||
| { | |||
| outptr[j] = ptr[j]; | |||
| } | |||
| float* outptr = top_blob; | |||
| memcpy(outptr, ptr, size * sizeof(float)); | |||
| q += slice; | |||
| } | |||
| @@ -84,13 +84,12 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| return -100; | |||
| max.fill(-FLT_MAX); | |||
| float* maxptr = max; | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| const float* ptr = bottom_top_blob.row(i); | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| maxptr[j] = std::max(maxptr[j], ptr[j]); | |||
| max[j] = std::max(max[j], ptr[j]); | |||
| } | |||
| } | |||
| @@ -99,7 +98,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| float* ptr = bottom_top_blob.row(i); | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] = exp(ptr[j] - maxptr[j]); | |||
| ptr[j] = exp(ptr[j] - max[j]); | |||
| } | |||
| } | |||
| @@ -109,13 +108,12 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| return -100; | |||
| sum.fill(0.f); | |||
| float* sumptr = sum; | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| const float* ptr = bottom_top_blob.row(i); | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| sumptr[j] += ptr[j]; | |||
| sum[j] += ptr[j]; | |||
| } | |||
| } | |||
| @@ -124,7 +122,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| float* ptr = bottom_top_blob.row(i); | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] /= sumptr[j]; | |||
| ptr[j] /= sum[j]; | |||
| } | |||
| } | |||
| @@ -141,7 +139,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| if (max.empty()) | |||
| return -100; | |||
| float* maxptr = max; | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| const float* ptr = bottom_top_blob.row(i); | |||
| @@ -152,14 +149,14 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| m = std::max(m, ptr[j]); | |||
| } | |||
| maxptr[i] = m; | |||
| max[i] = m; | |||
| } | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float* ptr = bottom_top_blob.row(i); | |||
| float m = maxptr[i]; | |||
| float m = max[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] = exp(ptr[j] - m); | |||
| @@ -171,7 +168,6 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| if (sum.empty()) | |||
| return -100; | |||
| float* sumptr = sum; | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| const float* ptr = bottom_top_blob.row(i); | |||
| @@ -182,14 +178,14 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| s += ptr[j]; | |||
| } | |||
| sumptr[i] = s; | |||
| sum[i] = s; | |||
| } | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float* ptr = bottom_top_blob.row(i); | |||
| float s = sumptr[i]; | |||
| float s = sum[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] /= s; | |||
| @@ -214,11 +210,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = bottom_top_blob.channel(q); | |||
| float* maxptr = max; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| maxptr[i] = std::max(maxptr[i], ptr[i]); | |||
| max[i] = std::max(max[i], ptr[i]); | |||
| } | |||
| } | |||
| @@ -226,11 +221,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float* maxptr = max; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| ptr[i] = exp(ptr[i] - maxptr[i]); | |||
| ptr[i] = exp(ptr[i] - max[i]); | |||
| } | |||
| } | |||
| @@ -242,11 +236,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| const float* ptr = bottom_top_blob.channel(q); | |||
| float* sumptr = sum; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| sumptr[i] += ptr[i]; | |||
| sum[i] += ptr[i]; | |||
| } | |||
| } | |||
| @@ -254,11 +247,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob) const | |||
| for (int q=0; q<channels; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| float* sumptr = sum; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| ptr[i] /= sumptr[i]; | |||
| ptr[i] /= sum[i]; | |||
| } | |||
| } | |||
| @@ -113,7 +113,7 @@ int SPP::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| const float* sptr = m.data + m.w * i*stride_h + j*stride_w; | |||
| const float* sptr = m.row(i*stride_h) + j*stride_w; | |||
| float max = sptr[0]; | |||
| @@ -142,7 +142,7 @@ int SPP::forward(const Mat& bottom_blob, Mat& top_blob) const | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| { | |||
| const float* sptr = m.data + m.w * i*stride_h + j*stride_w; | |||
| const float* sptr = m.row(i*stride_h) + j*stride_w; | |||
| float sum = 0; | |||
| @@ -40,12 +40,10 @@ static int unary_op_inplace(Mat& a) | |||
| int size = a.total(); | |||
| float* ptr = a; | |||
| #pragma omp parallel for | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| ptr[i] = op(ptr[i]); | |||
| a[i] = op(a[i]); | |||
| } | |||
| return 0; | |||
| @@ -32,7 +32,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val | |||
| #pragma omp parallel for | |||
| for (int q=0; q<c; q++) | |||
| { | |||
| float* ptr = data + cstep * q; | |||
| float* ptr = channel(q);//data + cstep * q; | |||
| const float mean = mean_vals[q]; | |||
| #if __ARM_NEON | |||
| @@ -87,7 +87,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val | |||
| #pragma omp parallel for | |||
| for (int q=0; q<c; q++) | |||
| { | |||
| float* ptr = data + cstep * q; | |||
| float* ptr = channel(q);//data + cstep * q; | |||
| const float norm = norm_vals[q]; | |||
| #if __ARM_NEON | |||
| @@ -142,7 +142,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val | |||
| #pragma omp parallel for | |||
| for (int q=0; q<c; q++) | |||
| { | |||
| float* ptr = data + cstep * q; | |||
| float* ptr = channel(q);//data + cstep * q; | |||
| const float mean = mean_vals[q]; | |||
| const float norm = norm_vals[q]; | |||
| @@ -257,7 +257,7 @@ Mat Mat::from_float16(const unsigned short* data, int size) | |||
| if (m.empty()) | |||
| return m; | |||
| float* ptr = m.data; | |||
| float* ptr = m;//.data; | |||
| #if __ARM_NEON && (__ARM_FP & 2) | |||
| int nn = cpu_support_arm_vfpv4() ? size >> 2 : 0; | |||
| @@ -324,8 +324,8 @@ static void copy_make_border_image(const Mat& src, Mat& dst, int top, int left, | |||
| int w = dst.w; | |||
| int h = dst.h; | |||
| const float* ptr = src.data; | |||
| float* outptr = dst.data; | |||
| const float* ptr = src;//.data; | |||
| float* outptr = dst;//.data; | |||
| if (type == BORDER_CONSTANT) | |||
| { | |||
| @@ -508,8 +508,8 @@ static void copy_cut_border_image(const Mat& src, Mat& dst, int top, int left) | |||
| int w = dst.w; | |||
| int h = dst.h; | |||
| const float* ptr = src.data + src.w * top + left; | |||
| float* outptr = dst.data; | |||
| const float* ptr = src.row(top) + left;//.data + src.w * top + left; | |||
| float* outptr = dst;//.data; | |||
| for (int y = 0; y < h; y++) | |||
| { | |||
| @@ -30,25 +30,26 @@ public: | |||
| // empty | |||
| Mat(); | |||
| // vec | |||
| Mat(int w); | |||
| Mat(int w, size_t elemsize = 4); | |||
| // image | |||
| Mat(int w, int h); | |||
| Mat(int w, int h, size_t elemsize = 4); | |||
| // dim | |||
| Mat(int w, int h, int c); | |||
| Mat(int w, int h, int c, size_t elemsize = 4); | |||
| // copy | |||
| Mat(const Mat& m); | |||
| // external vec | |||
| Mat(int w, float* data); | |||
| Mat(int w, void* data, size_t elemsize = 4); | |||
| // external image | |||
| Mat(int w, int h, float* data); | |||
| Mat(int w, int h, void* data, size_t elemsize = 4); | |||
| // external dim | |||
| Mat(int w, int h, int c, float* data); | |||
| Mat(int w, int h, int c, void* data, size_t elemsize = 4); | |||
| // release | |||
| ~Mat(); | |||
| // assign | |||
| Mat& operator=(const Mat& m); | |||
| // set all | |||
| void fill(float v); | |||
| template <typename T> void fill(T v); | |||
| // deep copy | |||
| Mat clone() const; | |||
| // reshape vec | |||
| @@ -58,11 +59,11 @@ public: | |||
| // reshape dim | |||
| Mat reshape(int w, int h, int c) const; | |||
| // allocate vec | |||
| void create(int w); | |||
| void create(int w, size_t elemsize = 4); | |||
| // allocate image | |||
| void create(int w, int h); | |||
| void create(int w, int h, size_t elemsize = 4); | |||
| // allocate dim | |||
| void create(int w, int h, int c); | |||
| void create(int w, int h, int c, size_t elemsize = 4); | |||
| // refcount++ | |||
| void addref(); | |||
| // refcount-- | |||
| @@ -76,8 +77,16 @@ public: | |||
| const Mat channel(int c) const; | |||
| float* row(int y); | |||
| const float* row(int y) const; | |||
| operator float*(); | |||
| operator const float*() const; | |||
| template<typename T> T* row(int y); | |||
| template<typename T> const T* row(int y) const; | |||
| // access raw data | |||
| template<typename T> operator T*(); | |||
| template<typename T> operator const T*() const; | |||
| // convenient access float vec element | |||
| float& operator[](int i); | |||
| const float& operator[](int i) const; | |||
| enum | |||
| { | |||
| @@ -119,15 +128,23 @@ public: | |||
| // convenient construct from half precisoin floating point data | |||
| static Mat from_float16(const unsigned short* data, int size); | |||
| // the dimensionality | |||
| int dims; | |||
| // pointer to the data | |||
| float* data; | |||
| void* data; | |||
| // pointer to the reference counter; | |||
| // pointer to the reference counter | |||
| // when points to user-allocated data, the pointer is NULL | |||
| int* refcount; | |||
| // element size in bytes | |||
| // 4 = float32/int32 | |||
| // 2 = float16 | |||
| // 1 = int8/uint8 | |||
| // 0 = empty | |||
| size_t elemsize; | |||
| // the dimensionality | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| @@ -217,30 +234,30 @@ static inline void NCNN_XADD(int* addr, int delta) { int tmp = *addr; *addr += d | |||
| #endif | |||
| inline Mat::Mat() | |||
| : dims(0), data(0), refcount(0), w(0), h(0), c(0), cstep(0) | |||
| : data(0), refcount(0), elemsize(0), dims(0), w(0), h(0), c(0), cstep(0) | |||
| { | |||
| } | |||
| inline Mat::Mat(int _w) | |||
| : dims(0), data(0), refcount(0) | |||
| inline Mat::Mat(int _w, size_t _elemsize) | |||
| : data(0), refcount(0), dims(0) | |||
| { | |||
| create(_w); | |||
| create(_w, _elemsize); | |||
| } | |||
| inline Mat::Mat(int _w, int _h) | |||
| : dims(0), data(0), refcount(0) | |||
| inline Mat::Mat(int _w, int _h, size_t _elemsize) | |||
| : data(0), refcount(0), dims(0) | |||
| { | |||
| create(_w, _h); | |||
| create(_w, _h, _elemsize); | |||
| } | |||
| inline Mat::Mat(int _w, int _h, int _c) | |||
| : dims(0), data(0), refcount(0) | |||
| inline Mat::Mat(int _w, int _h, int _c, size_t _elemsize) | |||
| : data(0), refcount(0), dims(0) | |||
| { | |||
| create(_w, _h, _c); | |||
| create(_w, _h, _c, _elemsize); | |||
| } | |||
| inline Mat::Mat(const Mat& m) | |||
| : dims(m.dims), data(m.data), refcount(m.refcount) | |||
| : data(m.data), refcount(m.refcount), elemsize(m.elemsize), dims(m.dims) | |||
| { | |||
| if (refcount) | |||
| NCNN_XADD(refcount, 1); | |||
| @@ -252,8 +269,8 @@ inline Mat::Mat(const Mat& m) | |||
| cstep = m.cstep; | |||
| } | |||
| inline Mat::Mat(int _w, float* _data) | |||
| : dims(1), data(_data), refcount(0) | |||
| inline Mat::Mat(int _w, void* _data, size_t _elemsize) | |||
| : data(_data), refcount(0), elemsize(_elemsize), dims(1) | |||
| { | |||
| w = _w; | |||
| h = 1; | |||
| @@ -262,8 +279,8 @@ inline Mat::Mat(int _w, float* _data) | |||
| cstep = w; | |||
| } | |||
| inline Mat::Mat(int _w, int _h, float* _data) | |||
| : dims(2), data(_data), refcount(0) | |||
| inline Mat::Mat(int _w, int _h, void* _data, size_t _elemsize) | |||
| : data(_data), refcount(0), elemsize(_elemsize), dims(2) | |||
| { | |||
| w = _w; | |||
| h = _h; | |||
| @@ -272,14 +289,14 @@ inline Mat::Mat(int _w, int _h, float* _data) | |||
| cstep = w * h; | |||
| } | |||
| inline Mat::Mat(int _w, int _h, int _c, float* _data) | |||
| : dims(3), data(_data), refcount(0) | |||
| inline Mat::Mat(int _w, int _h, int _c, void* _data, size_t _elemsize) | |||
| : data(_data), refcount(0), elemsize(_elemsize), dims(3) | |||
| { | |||
| w = _w; | |||
| h = _h; | |||
| c = _c; | |||
| cstep = alignSize(w * h * sizeof(float), 16) >> 2; | |||
| cstep = alignSize(w * h * elemsize, 16) / elemsize; | |||
| } | |||
| inline Mat::~Mat() | |||
| @@ -297,10 +314,11 @@ inline Mat& Mat::operator=(const Mat& m) | |||
| release(); | |||
| dims = m.dims; | |||
| data = m.data; | |||
| refcount = m.refcount; | |||
| elemsize = m.elemsize; | |||
| dims = m.dims; | |||
| w = m.w; | |||
| h = m.h; | |||
| c = m.c; | |||
| @@ -313,7 +331,7 @@ inline Mat& Mat::operator=(const Mat& m) | |||
| inline void Mat::fill(float _v) | |||
| { | |||
| int size = total(); | |||
| float* ptr = data; | |||
| float* ptr = (float*)data; | |||
| #if __ARM_NEON | |||
| int nn = size >> 2; | |||
| @@ -354,6 +372,17 @@ inline void Mat::fill(float _v) | |||
| } | |||
| } | |||
| template <typename T> | |||
| inline void Mat::fill(T _v) | |||
| { | |||
| int size = total(); | |||
| T* ptr = (T*)data; | |||
| for (int i=0; i<size; i++) | |||
| { | |||
| ptr[i] = _v; | |||
| } | |||
| } | |||
| inline Mat Mat::clone() const | |||
| { | |||
| if (empty()) | |||
| @@ -361,15 +390,15 @@ inline Mat Mat::clone() const | |||
| Mat m; | |||
| if (dims == 1) | |||
| m.create(w); | |||
| m.create(w, elemsize); | |||
| else if (dims == 2) | |||
| m.create(w, h); | |||
| m.create(w, h, elemsize); | |||
| else if (dims == 3) | |||
| m.create(w, h, c); | |||
| m.create(w, h, c, elemsize); | |||
| if (total() > 0) | |||
| { | |||
| memcpy(m.data, data, total() * sizeof(float)); | |||
| memcpy(m.data, data, total() * elemsize); | |||
| } | |||
| return m; | |||
| @@ -383,14 +412,14 @@ inline Mat Mat::reshape(int _w) const | |||
| if (dims == 3 && cstep != (size_t)w * h) | |||
| { | |||
| Mat m; | |||
| m.create(_w); | |||
| m.create(_w, elemsize); | |||
| // flatten | |||
| for (int i=0; i<c; i++) | |||
| { | |||
| const float* ptr = data + i * cstep; | |||
| float* mptr = m.data + i * w * h; | |||
| memcpy(mptr, ptr, w * h * sizeof(float)); | |||
| const void* ptr = (unsigned char*)data + i * cstep * elemsize; | |||
| void* mptr = (unsigned char*)m.data + i * w * h * elemsize; | |||
| memcpy(mptr, ptr, w * h * elemsize); | |||
| } | |||
| return m; | |||
| @@ -399,7 +428,6 @@ inline Mat Mat::reshape(int _w) const | |||
| Mat m = *this; | |||
| m.dims = 1; | |||
| m.w = _w; | |||
| m.h = 1; | |||
| m.c = 1; | |||
| @@ -417,14 +445,14 @@ inline Mat Mat::reshape(int _w, int _h) const | |||
| if (dims == 3 && cstep != (size_t)w * h) | |||
| { | |||
| Mat m; | |||
| m.create(_w, _h); | |||
| m.create(_w, _h, elemsize); | |||
| // flatten | |||
| for (int i=0; i<c; i++) | |||
| { | |||
| const float* ptr = data + i * cstep; | |||
| float* mptr = m.data + i * w * h; | |||
| memcpy(mptr, ptr, w * h * sizeof(float)); | |||
| const void* ptr = (unsigned char*)data + i * cstep * elemsize; | |||
| void* mptr = (unsigned char*)m.data + i * w * h * elemsize; | |||
| memcpy(mptr, ptr, w * h * elemsize); | |||
| } | |||
| return m; | |||
| @@ -433,7 +461,6 @@ inline Mat Mat::reshape(int _w, int _h) const | |||
| Mat m = *this; | |||
| m.dims = 2; | |||
| m.w = _w; | |||
| m.h = _h; | |||
| m.c = 1; | |||
| @@ -450,17 +477,17 @@ inline Mat Mat::reshape(int _w, int _h, int _c) const | |||
| if (dims < 3) | |||
| { | |||
| if ((size_t)_w * _h != alignSize(_w * _h * sizeof(float), 16) >> 2) | |||
| if ((size_t)_w * _h != alignSize(_w * _h * elemsize, 16) / elemsize) | |||
| { | |||
| Mat m; | |||
| m.create(_w, _h, _c); | |||
| m.create(_w, _h, _c, elemsize); | |||
| // align channel | |||
| for (int i=0; i<_c; i++) | |||
| { | |||
| const float* ptr = data + i * _w * _h; | |||
| float* mptr = m.data + i * m.cstep; | |||
| memcpy(mptr, ptr, _w * _h * sizeof(float)); | |||
| const void* ptr = (unsigned char*)data + i * _w * _h * elemsize; | |||
| void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize; | |||
| memcpy(mptr, ptr, _w * _h * elemsize); | |||
| } | |||
| return m; | |||
| @@ -476,22 +503,22 @@ inline Mat Mat::reshape(int _w, int _h, int _c) const | |||
| Mat m = *this; | |||
| m.dims = 3; | |||
| m.w = _w; | |||
| m.h = _h; | |||
| m.c = _c; | |||
| m.cstep = alignSize(_w * _h * sizeof(float), 16) >> 2; | |||
| m.cstep = alignSize(_w * _h * elemsize, 16) / elemsize; | |||
| return m; | |||
| } | |||
| inline void Mat::create(int _w) | |||
| inline void Mat::create(int _w, size_t _elemsize) | |||
| { | |||
| release(); | |||
| dims = 1; | |||
| elemsize = _elemsize; | |||
| dims = 1; | |||
| w = _w; | |||
| h = 1; | |||
| c = 1; | |||
| @@ -500,19 +527,20 @@ inline void Mat::create(int _w) | |||
| if (total() > 0) | |||
| { | |||
| size_t totalsize = total() * sizeof(float); | |||
| data = (float*)fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| size_t totalsize = total() * elemsize; | |||
| data = fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| refcount = (int*)(((unsigned char*)data) + totalsize); | |||
| *refcount = 1; | |||
| } | |||
| } | |||
| inline void Mat::create(int _w, int _h) | |||
| inline void Mat::create(int _w, int _h, size_t _elemsize) | |||
| { | |||
| release(); | |||
| dims = 2; | |||
| elemsize = _elemsize; | |||
| dims = 2; | |||
| w = _w; | |||
| h = _h; | |||
| c = 1; | |||
| @@ -521,29 +549,30 @@ inline void Mat::create(int _w, int _h) | |||
| if (total() > 0) | |||
| { | |||
| size_t totalsize = total() * sizeof(float); | |||
| data = (float*)fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| size_t totalsize = total() * elemsize; | |||
| data = fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| refcount = (int*)(((unsigned char*)data) + totalsize); | |||
| *refcount = 1; | |||
| } | |||
| } | |||
| inline void Mat::create(int _w, int _h, int _c) | |||
| inline void Mat::create(int _w, int _h, int _c, size_t _elemsize) | |||
| { | |||
| release(); | |||
| dims = 3; | |||
| elemsize = _elemsize; | |||
| dims = 3; | |||
| w = _w; | |||
| h = _h; | |||
| c = _c; | |||
| cstep = alignSize(w * h * sizeof(float), 16) >> 2; | |||
| cstep = alignSize(w * h * elemsize, 16) / elemsize; | |||
| if (total() > 0) | |||
| { | |||
| size_t totalsize = total() * sizeof(float); | |||
| data = (float*)fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| size_t totalsize = total() * elemsize; | |||
| data = fastMalloc(totalsize + (int)sizeof(*refcount)); | |||
| refcount = (int*)(((unsigned char*)data) + totalsize); | |||
| *refcount = 1; | |||
| } | |||
| @@ -560,9 +589,11 @@ inline void Mat::release() | |||
| if (refcount && NCNN_XADD(refcount, -1) == 1) | |||
| fastFree(data); | |||
| dims = 0; | |||
| data = 0; | |||
| elemsize = 0; | |||
| dims = 0; | |||
| w = 0; | |||
| h = 0; | |||
| c = 0; | |||
| @@ -584,32 +615,56 @@ inline size_t Mat::total() const | |||
| inline Mat Mat::channel(int c) | |||
| { | |||
| return Mat(w, h, data + cstep * c); | |||
| return Mat(w, h, (unsigned char*)data + cstep * c * elemsize, elemsize); | |||
| } | |||
| inline const Mat Mat::channel(int c) const | |||
| { | |||
| return Mat(w, h, data + cstep * c); | |||
| return Mat(w, h, (unsigned char*)data + cstep * c * elemsize, elemsize); | |||
| } | |||
| inline float* Mat::row(int y) | |||
| { | |||
| return data + w * y; | |||
| return (float*)data + w * y; | |||
| } | |||
| inline const float* Mat::row(int y) const | |||
| { | |||
| return data + w * y; | |||
| return (const float*)data + w * y; | |||
| } | |||
| template <typename T> | |||
| inline T* Mat::row(int y) | |||
| { | |||
| return (T*)data + w * y; | |||
| } | |||
| template <typename T> | |||
| inline const T* Mat::row(int y) const | |||
| { | |||
| return (const T*)data + w * y; | |||
| } | |||
| template <typename T> | |||
| inline Mat::operator T*() | |||
| { | |||
| return (T*)data; | |||
| } | |||
| template <typename T> | |||
| inline Mat::operator const T*() const | |||
| { | |||
| return (const T*)data; | |||
| } | |||
| inline Mat::operator float*() | |||
| inline float& Mat::operator[](int i) | |||
| { | |||
| return data; | |||
| return ((float*)data)[i]; | |||
| } | |||
| inline Mat::operator const float*() const | |||
| inline const float& Mat::operator[](int i) const | |||
| { | |||
| return data; | |||
| return ((const float*)data)[i]; | |||
| } | |||
| } // namespace ncnn | |||
| @@ -121,9 +121,15 @@ int ParamDict::load_param(FILE* fp) | |||
| bool is_float = vstr_is_float(vstr); | |||
| if (is_float) | |||
| nscan = sscanf(vstr, "%f", ¶ms[id].v.data[j]); | |||
| { | |||
| float* ptr = params[id].v; | |||
| nscan = sscanf(vstr, "%f", &ptr[j]); | |||
| } | |||
| else | |||
| nscan = sscanf(vstr, "%d", (int*)¶ms[id].v.data[j]); | |||
| { | |||
| int* ptr = params[id].v; | |||
| nscan = sscanf(vstr, "%d", &ptr[j]); | |||
| } | |||
| if (nscan != 1) | |||
| { | |||
| fprintf(stderr, "ParamDict parse array element fail\n"); | |||
| @@ -196,10 +202,8 @@ int ParamDict::load_param_bin(FILE* fp) | |||
| params[id].v.create(len); | |||
| for (int j = 0; j < len; j++) | |||
| { | |||
| fread(¶ms[id].v.data[j], sizeof(float), 1, fp); | |||
| } | |||
| float* ptr = params[id].v; | |||
| fread(ptr, sizeof(float), len, fp); | |||
| } | |||
| else | |||
| { | |||
| @@ -237,11 +241,8 @@ int ParamDict::load_param(const unsigned char*& mem) | |||
| params[id].v.create(len); | |||
| for (int j = 0; j < len; j++) | |||
| { | |||
| params[id].v.data[j] = *(float*)(mem); | |||
| mem += 4; | |||
| } | |||
| memcpy(params[id].v.data, mem, len * 4); | |||
| mem += 4; | |||
| } | |||
| else | |||
| { | |||