Browse Source

codeclean for layernorm and conv

tags/v1.2.0-rc1
Pengyongrong 5 years ago
parent
commit
36bb147844
4 changed files with 27 additions and 27 deletions
  1. +12
    -12
      mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl
  2. +1
    -1
      mindspore/lite/src/runtime/kernel/opencl/cl/layer_norm.cl
  3. +13
    -13
      mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
  4. +1
    -1
      mindspore/lite/test/models_npu.cfg

+ 12
- 12
mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl View File

@@ -39,9 +39,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP
__kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 1;
int BlockW = 1;
int BlockC = 1;
const int BlockH = 1;
const int BlockW = 1;
const int BlockC = 1;
DEFINE_ARGS;

int oh0 = oh + 0;
@@ -102,9 +102,9 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 1;
int BlockC = 1;
const int BlockH = 2;
const int BlockW = 1;
const int BlockC = 1;
DEFINE_ARGS;

int oh0 = oh + 0;
@@ -189,9 +189,9 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 1;
int BlockC = 2;
const int BlockH = 2;
const int BlockW = 1;
const int BlockC = 2;
DEFINE_ARGS;

int oh0 = oh + 0;
@@ -312,9 +312,9 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t
__kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
int2 dilation, int act_type, float alpha) {
int BlockH = 2;
int BlockW = 2;
int BlockC = 2;
const int BlockH = 2;
const int BlockW = 2;
const int BlockC = 2;
DEFINE_ARGS;

int oh0 = oh + 0;


+ 1
- 1
mindspore/lite/src/runtime/kernel/opencl/cl/layer_norm.cl View File

@@ -7,7 +7,7 @@ __kernel void ComputeMeanVarDim1NHWC4(__read_only image2d_t src_data, __global F
int4 in_shape, int normalized_shape_size) {
int X = get_global_id(0); // n*h
int Y = get_global_id(1); // w
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0) {
if (X > in_shape.x * in_shape.y || Y > in_shape.z || in_shape.y == 0 || normalized_shape_size == 0) {
return;
}
int n = X / in_shape.y;


+ 13
- 13
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc View File

@@ -41,8 +41,8 @@ using mindspore::schema::PrimitiveType_FullConnection;

namespace mindspore::kernel {

constexpr size_t CI_TILE = C4NUM;
constexpr size_t CO_TILE = C4NUM;
const size_t CI_TILE = C4NUM;
const size_t CO_TILE = C4NUM;

int Conv2DOpenCLKernel::CheckSpecs() {
if (in_tensors_.size() != 2 && in_tensors_.size() != 3) {
@@ -164,12 +164,12 @@ int Conv2DOpenCLKernel::Prepare() {
}

int Conv2DOpenCLKernel::GenerateWinogradFilter() {
constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
constexpr float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};
const float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000,
0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000,
0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000};
const float G[] = {1.0000000000, 0.0000000000, 0.0000000000, 1.0000000000, 0.7071067691, 0.4999999702,
1.0000000000, -0.7071067691, 0.4999999702, 1.0000000000, 1.4142135382, 1.9999998808,
1.0000000000, -1.4142135382, 1.9999998808, 0.0000000000, 0.0000000000, 1.0000000000};

auto weight_tensor = in_tensors_.at(1);
auto origin_weight_fp32 = reinterpret_cast<float *>(weight_tensor->data_c());
@@ -366,11 +366,11 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H);
size_t global_w = UP_DIV(OW_, block_size_.W);
size_t global_c = UP_DIV(CO_SLICES_, block_size_.C);
constexpr int local_c_max = 16;
constexpr int local_hw_max = 256;
constexpr int OH_threshold = 100;
constexpr int OW_threshold = 100;
constexpr int OC_threshold = 64;
const int local_c_max = 16;
const int local_hw_max = 256;
const int OH_threshold = 100;
const int OW_threshold = 100;
const int OC_threshold = 64;
size_t local_c = GetMaxDivisor(global_c, local_c_max);
local_c = std::max<size_t>(local_c, 1);
size_t local_hw = local_hw_max / local_c;


+ 1
- 1
mindspore/lite/test/models_npu.cfg View File

@@ -1,3 +1,3 @@
mobilenet_v1_1.0_224.tflite 2.5
mobilenet_v2_1.0_224.tflite 2.5
squeezenet.tflite 2.5
inception_v3.tflite 1

Loading…
Cancel
Save