| @@ -65,9 +65,9 @@ extern "C" JNIEXPORT jbyteArray JNICALL Java_com_mindspore_lite_MSTensor_getByte | |||||
| return env->NewByteArray(0); | return env->NewByteArray(0); | ||||
| } | } | ||||
| auto local_element_num = ms_tensor_ptr->ElementsNum(); | |||||
| auto ret = env->NewByteArray(local_element_num); | |||||
| env->SetByteArrayRegion(ret, 0, local_element_num, local_data); | |||||
| auto local_size = ms_tensor_ptr->Size(); | |||||
| auto ret = env->NewByteArray(local_size); | |||||
| env->SetByteArrayRegion(ret, 0, local_size, local_data); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -11,7 +11,7 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | ||||
| return; \ | return; \ | ||||
| } \ | } \ | ||||
| TYPE4 result; | |||||
| DTYPE4 result; | |||||
| // axis = 1 | // axis = 1 | ||||
| #define DOConcat2inputaxis1_NHWC4 \ | #define DOConcat2inputaxis1_NHWC4 \ | ||||
| @@ -248,12 +248,12 @@ CONCAT2(2input, axis3, _NHWC4) | |||||
| int Align_OutShape = output_shape.w; \ | int Align_OutShape = output_shape.w; \ | ||||
| int index_output = (IN * output_shape.y + IH) * stride_w + IW * Align_OutShape * C4NUM; | int index_output = (IN * output_shape.y + IH) * stride_w + IW * Align_OutShape * C4NUM; | ||||
| int doconcat(__read_only image2d_t input, __global TYPE *output, int Align_Shape, int4 input_shape, int IN, int IH, | |||||
| int doconcat(__read_only image2d_t input, __global DTYPE *output, int Align_Shape, int4 input_shape, int IN, int IH, | |||||
| int Y, int index_output) { | int Y, int index_output) { | ||||
| int Remainder = input_shape.w % C4NUM; | int Remainder = input_shape.w % C4NUM; | ||||
| for (int i = 0; i < Align_Shape; ++i) { | for (int i = 0; i < Align_Shape; ++i) { | ||||
| TYPE4 result = READ_IMAGE(input, smp_none, (int2)((Y * Align_Shape + i), (IN * input_shape.y + IH))); | |||||
| TYPE result_temp[4] = {result.x, result.y, result.z, result.w}; | |||||
| DTYPE4 result = READ_IMAGE(input, smp_none, (int2)((Y * Align_Shape + i), (IN * input_shape.y + IH))); | |||||
| DTYPE result_temp[4] = {result.x, result.y, result.z, result.w}; | |||||
| if ((i + 1) * C4NUM <= input_shape.w) { | if ((i + 1) * C4NUM <= input_shape.w) { | ||||
| for (int j = 0; j < C4NUM; ++j) { | for (int j = 0; j < C4NUM; ++j) { | ||||
| output[index_output++] = result_temp[j]; | output[index_output++] = result_temp[j]; | ||||
| @@ -268,7 +268,7 @@ int doconcat(__read_only image2d_t input, __global TYPE *output, int Align_Shape | |||||
| } | } | ||||
| __kernel void ConcatInput2UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | __kernel void ConcatInput2UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | ||||
| __global TYPE *output, int4 input_shape0, int4 input_shape1, int stride_w, | |||||
| __global DTYPE *output, int4 input_shape0, int4 input_shape1, int stride_w, | |||||
| int4 output_shape) { | int4 output_shape) { | ||||
| CHECK_IDX_UNALIGN; | CHECK_IDX_UNALIGN; | ||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | ||||
| @@ -276,7 +276,7 @@ __kernel void ConcatInput2UnAlign_NHWC4(__read_only image2d_t input0, __read_onl | |||||
| } | } | ||||
| __kernel void ConcatInput3UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | __kernel void ConcatInput3UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | ||||
| __read_only image2d_t input2, __global TYPE *output, int4 input_shape0, | |||||
| __read_only image2d_t input2, __global DTYPE *output, int4 input_shape0, | |||||
| int4 input_shape1, int4 input_shape2, int stride_w, int4 output_shape) { | int4 input_shape1, int4 input_shape2, int stride_w, int4 output_shape) { | ||||
| CHECK_IDX_UNALIGN; | CHECK_IDX_UNALIGN; | ||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM); | int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM); | ||||
| @@ -287,7 +287,7 @@ __kernel void ConcatInput3UnAlign_NHWC4(__read_only image2d_t input0, __read_onl | |||||
| __kernel void ConcatInput4UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | __kernel void ConcatInput4UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | ||||
| __read_only image2d_t input2, __read_only image2d_t input3, | __read_only image2d_t input2, __read_only image2d_t input3, | ||||
| __global TYPE *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| __global DTYPE *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| int4 input_shape3, int stride_w, int4 output_shape) { | int4 input_shape3, int stride_w, int4 output_shape) { | ||||
| CHECK_IDX_UNALIGN; | CHECK_IDX_UNALIGN; | ||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM), Align_Shape3 = UP_DIV(input_shape3.w, C4NUM); | int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM), Align_Shape3 = UP_DIV(input_shape3.w, C4NUM); | ||||
| @@ -299,7 +299,7 @@ __kernel void ConcatInput4UnAlign_NHWC4(__read_only image2d_t input0, __read_onl | |||||
| __kernel void ConcatInput5UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | __kernel void ConcatInput5UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | ||||
| __read_only image2d_t input2, __read_only image2d_t input3, | __read_only image2d_t input2, __read_only image2d_t input3, | ||||
| __read_only image2d_t input4, __global TYPE *output, int4 input_shape0, | |||||
| __read_only image2d_t input4, __global DTYPE *output, int4 input_shape0, | |||||
| int4 input_shape1, int4 input_shape2, int4 input_shape3, int4 input_shape4, | int4 input_shape1, int4 input_shape2, int4 input_shape3, int4 input_shape4, | ||||
| int stride_w, int4 output_shape) { | int stride_w, int4 output_shape) { | ||||
| CHECK_IDX_UNALIGN; | CHECK_IDX_UNALIGN; | ||||
| @@ -315,7 +315,7 @@ __kernel void ConcatInput5UnAlign_NHWC4(__read_only image2d_t input0, __read_onl | |||||
| __kernel void ConcatInput6UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | __kernel void ConcatInput6UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | ||||
| __read_only image2d_t input2, __read_only image2d_t input3, | __read_only image2d_t input2, __read_only image2d_t input3, | ||||
| __read_only image2d_t input4, __read_only image2d_t input5, | __read_only image2d_t input4, __read_only image2d_t input5, | ||||
| __global TYPE *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| __global DTYPE *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| int4 input_shape3, int4 input_shape4, int4 input_shape5, int stride_w, | int4 input_shape3, int4 input_shape4, int4 input_shape5, int stride_w, | ||||
| int4 output_shape) { | int4 output_shape) { | ||||
| CHECK_IDX_UNALIGN; | CHECK_IDX_UNALIGN; | ||||
| @@ -9,7 +9,7 @@ __kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_ | |||||
| if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) { | if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| TYPE4 res_data = (TYPE4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| DTYPE4 res_data = (DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| int batch = Y / dst_size.y; | int batch = Y / dst_size.y; | ||||
| int height = Y % dst_size.y; | int height = Y % dst_size.y; | ||||
| if (axis == 0) { | if (axis == 0) { | ||||
| @@ -20,10 +20,10 @@ __kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_ | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height)); | res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height)); | ||||
| } else if (axis == 3) { | } else if (axis == 3) { | ||||
| int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4}; | int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4}; | ||||
| TYPE tmp[4]; | |||||
| TYPE res_tmp[4]; | |||||
| DTYPE tmp[4]; | |||||
| DTYPE res_tmp[4]; | |||||
| for (int i = 0; i < indices_num; ++i) { | for (int i = 0; i < indices_num; ++i) { | ||||
| TYPE4 rd_data = (TYPE4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| DTYPE4 rd_data = (DTYPE4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | ||||
| if (i >= 1 && offset[i] != offset[i - 1]) { | if (i >= 1 && offset[i] != offset[i - 1]) { | ||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | ||||
| @@ -16,9 +16,9 @@ __kernel void reshape_NHWC4(__read_only image2d_t src_data, __write_only image2d | |||||
| int CI4_rem = src_size.x % C4NUM; | int CI4_rem = src_size.x % C4NUM; | ||||
| CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem; | CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem; | ||||
| int in_img_x = CI4 * src_size.y; | int in_img_x = CI4 * src_size.y; | ||||
| TYPE4 res = (TYPE4)(0.0f); | |||||
| TYPE tmp[4]; | |||||
| TYPE res_tmp[4]; | |||||
| DTYPE4 res = (DTYPE4)(0.0f); | |||||
| DTYPE tmp[4]; | |||||
| DTYPE res_tmp[4]; | |||||
| int gcnt = 0; | int gcnt = 0; | ||||
| if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) { | if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) { | ||||
| gcnt = X + dst_size.x * Y; | gcnt = X + dst_size.x * Y; | ||||
| @@ -2,14 +2,14 @@ | |||||
| #define C4NUM 4 | #define C4NUM 4 | ||||
| __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | ||||
| __kernel void SparseToDenseScalar(__read_only image2d_t input, __global float *output, float weight, int2 inputshape, | |||||
| __kernel void SparseToDenseScalar(__read_only image2d_t input, __global DTYPE *output, float weight, int2 inputshape, | |||||
| int4 outputshape, float default_value, int stride_w, int inshapeindex1_dim) { | int4 outputshape, float default_value, int stride_w, int inshapeindex1_dim) { | ||||
| int X = get_global_id(0); | int X = get_global_id(0); | ||||
| int Y = get_global_id(1); | int Y = get_global_id(1); | ||||
| if (X >= inputshape.x || Y >= inputshape.y) { | if (X >= inputshape.x || Y >= inputshape.y) { | ||||
| return; | return; | ||||
| } | } | ||||
| FLT4 index_input = READ_IMAGE(input, smp_zero, (int2)(Y, X)); | |||||
| int4 index_input = read_imagei(input, smp_zero, (int2)(Y, X)); | |||||
| int4 index_input_int = *((int4 *)&index_input); | int4 index_input_int = *((int4 *)&index_input); | ||||
| int index = 0; | int index = 0; | ||||
| if (inshapeindex1_dim == 1) { | if (inshapeindex1_dim == 1) { | ||||
| @@ -25,7 +25,7 @@ __kernel void SparseToDenseScalar(__read_only image2d_t input, __global float *o | |||||
| output[index] = weight; | output[index] = weight; | ||||
| } | } | ||||
| __kernel void SparseToDenseVector(__read_only image2d_t input, __global float *output, __global float *weight_vector, | |||||
| __kernel void SparseToDenseVector(__read_only image2d_t input, __global DTYPE *output, __global float *weight_vector, | |||||
| int2 inputshape, int4 outputshape, float default_value, int stride_w, | int2 inputshape, int4 outputshape, float default_value, int stride_w, | ||||
| int inshapeindex1_dim) { | int inshapeindex1_dim) { | ||||
| int X = get_global_id(0); | int X = get_global_id(0); | ||||
| @@ -33,7 +33,7 @@ __kernel void SparseToDenseVector(__read_only image2d_t input, __global float *o | |||||
| if (X >= inputshape.x || Y >= inputshape.y) { | if (X >= inputshape.x || Y >= inputshape.y) { | ||||
| return; | return; | ||||
| } | } | ||||
| FLT4 index_input = READ_IMAGE(input, smp_zero, (int2)(Y, X)); | |||||
| int4 index_input = read_imagei(input, smp_zero, (int2)(Y, X)); | |||||
| int4 index_input_int = *((int4 *)&index_input); | int4 index_input_int = *((int4 *)&index_input); | ||||
| int index = 0; | int index = 0; | ||||
| if (inshapeindex1_dim == 1) { | if (inshapeindex1_dim == 1) { | ||||
| @@ -73,14 +73,7 @@ int ActivationOpenCLKernel::Prepare() { | |||||
| std::string program_name = "Activation"; | std::string program_name = "Activation"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::string kernel_name = GetActTypeString(type_); | std::string kernel_name = GetActTypeString(type_); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -149,14 +149,7 @@ int ArgMinMaxOpenCLKernel::Prepare() { | |||||
| std::string source = argminmax_source; | std::string source = argminmax_source; | ||||
| std::string program_name = "argminmax"; | std::string program_name = "argminmax"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| @@ -191,14 +191,7 @@ int ArithmeticOpenCLKernel::Prepare() { | |||||
| std::string program_name = "Arithmetic"; | std::string program_name = "Arithmetic"; | ||||
| std::string source = arithmetic_source; | std::string source = arithmetic_source; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| int error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name_, build_options_ext); | int error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name_, build_options_ext); | ||||
| #endif | #endif | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| @@ -89,14 +89,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() { | |||||
| MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; | MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; | ||||
| std::string program_name = "ArithmeticSelf"; | std::string program_name = "ArithmeticSelf"; | ||||
| ocl_runtime_->LoadSource(program_name, arithmeticself_source); | ocl_runtime_->LoadSource(program_name, arithmeticself_source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -94,12 +94,7 @@ int BatchToSpaceNDOpenCLKernel::Prepare() { | |||||
| std::string source = batch_to_space_nd_source; | std::string source = batch_to_space_nd_source; | ||||
| std::string program_name = "batch_to_space_nd"; | std::string program_name = "batch_to_space_nd"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| @@ -168,12 +168,7 @@ int BatchNormOpenCLKernel::Prepare() { | |||||
| std::string source = batchnorm_source; | std::string source = batchnorm_source; | ||||
| std::string program_name = "Batch_normalization"; | std::string program_name = "Batch_normalization"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| int ret = Initweight(); | int ret = Initweight(); | ||||
| @@ -224,14 +224,7 @@ int ConcatOpenCLKernel::Prepare() { | |||||
| std::string source = concat_source; | std::string source = concat_source; | ||||
| std::string program_name = "Concat"; | std::string program_name = "Concat"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -144,12 +144,7 @@ void Conv2DOpenCLKernel::BuildKernel() { | |||||
| kernel_name << "_Img"; | kernel_name << "_Img"; | ||||
| } | } | ||||
| ocl_runtime_->LoadSource(program_name, GetActDefines() + conv2d_source); | ocl_runtime_->LoadSource(program_name, GetActDefines() + conv2d_source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name.str(), build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name.str(), build_options_ext); | ||||
| } | } | ||||
| @@ -65,12 +65,7 @@ int Conv2dTransposeOpenCLKernel::Prepare() { | |||||
| std::string source = GetActDefines() + conv2d_transpose_source; | std::string source = GetActDefines() + conv2d_transpose_source; | ||||
| std::string program_name = "conv2d_transpose"; | std::string program_name = "conv2d_transpose"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| auto ret = InitWeights(); | auto ret = InitWeights(); | ||||
| @@ -85,12 +85,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { | |||||
| std::string program_name = "DepthwiseConv2d"; | std::string program_name = "DepthwiseConv2d"; | ||||
| std::string source = depthwise_conv2d_source; | std::string source = depthwise_conv2d_source; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| auto ret = InitWeights(); | auto ret = InitWeights(); | ||||
| @@ -105,12 +105,7 @@ int FullConnectionOpenCLKernel::Prepare() { | |||||
| std::string source = fullconnection_source; | std::string source = fullconnection_source; | ||||
| std::string program_name = "FullConnection"; | std::string program_name = "FullConnection"; | ||||
| ocl_runtime_->LoadSource(program_name, GetActDefines() + source); | ocl_runtime_->LoadSource(program_name, GetActDefines() + source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| auto ret = InitWeights(); | auto ret = InitWeights(); | ||||
| @@ -114,14 +114,7 @@ int GatherOpenCLKernel::Prepare() { | |||||
| #else | #else | ||||
| std::string program_name = "gather"; | std::string program_name = "gather"; | ||||
| ocl_runtime_->LoadSource(program_name, gather_source); | ocl_runtime_->LoadSource(program_name, gather_source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| if (in_tensors_.at(1)->IsConst()) { | if (in_tensors_.at(1)->IsConst()) { | ||||
| @@ -171,12 +171,7 @@ int LayerNormOpenCLKernel::Prepare() { | |||||
| std::string source = layer_norm_source; | std::string source = layer_norm_source; | ||||
| std::string program_name = "LayerNormalization"; | std::string program_name = "LayerNormalization"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4"; | kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4"; | ||||
| ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext); | ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext); | ||||
| @@ -92,12 +92,7 @@ int MatMulOpenCLKernel::Prepare() { | |||||
| std::string source = matmul_source; | std::string source = matmul_source; | ||||
| std::string program_name = "MatMul"; | std::string program_name = "MatMul"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| @@ -74,12 +74,7 @@ int PadOpenCLKernel::Prepare() { | |||||
| const std::string source = pad_source; | const std::string source = pad_source; | ||||
| const std::string program_name = "Pad"; | const std::string program_name = "Pad"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, "Pad", build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, "Pad", build_options_ext); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -82,12 +82,7 @@ int PoolingOpenCLKernel::Prepare() { | |||||
| std::string source = pooling2d_source; | std::string source = pooling2d_source; | ||||
| std::string program_name = "Pooling2d"; | std::string program_name = "Pooling2d"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -118,12 +118,7 @@ int PowerOpenCLKernel::Prepare() { | |||||
| scale_ = param->scale_; | scale_ = param->scale_; | ||||
| shift_ = param->shift_; | shift_ = param->shift_; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -130,12 +130,7 @@ int PReluOpenCLKernel::Prepare() { | |||||
| std::string program_name = "PRelu"; | std::string program_name = "PRelu"; | ||||
| std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); | std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| InitWeights(); | InitWeights(); | ||||
| MS_LOG(DEBUG) << program_name << " init Done!"; | MS_LOG(DEBUG) << program_name << " init Done!"; | ||||
| @@ -186,12 +186,7 @@ int ReduceOpenCLKernel::Prepare() { | |||||
| std::string source = reduce_source; | std::string source = reduce_source; | ||||
| std::string program_name = "Reduce"; | std::string program_name = "Reduce"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| return ret; | return ret; | ||||
| @@ -78,14 +78,7 @@ int ReshapeOpenCLKernel::Prepare() { | |||||
| #else | #else | ||||
| std::string source = reshape_source; | std::string source = reshape_source; | ||||
| std::string program_name = "reshape"; | std::string program_name = "reshape"; | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DTYPE=int -DTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DTYPE=float -DTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DTYPE=half -DTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| @@ -70,12 +70,7 @@ int ResizeOpenCLKernel::Prepare() { | |||||
| std::string source = resize_source; | std::string source = resize_source; | ||||
| std::string program_name = "Resize"; | std::string program_name = "Resize"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -166,12 +166,7 @@ int ScaleOpenCLKernel::Prepare() { | |||||
| std::string program_name = "Scale"; | std::string program_name = "Scale"; | ||||
| std::string source = GetActDefines() + scale_source; | std::string source = GetActDefines() + scale_source; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| @@ -100,12 +100,7 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { | |||||
| std::string source = space_to_batch_nd_source; | std::string source = space_to_batch_nd_source; | ||||
| std::string program_name = "space_to_batch_nd"; | std::string program_name = "space_to_batch_nd"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| @@ -58,12 +58,7 @@ int SpaceToDepthOpenCLKernel::Prepare() { | |||||
| std::string source = space_to_depth_source; | std::string source = space_to_depth_source; | ||||
| std::string program_name = "SpaceToDepth"; | std::string program_name = "SpaceToDepth"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -141,6 +141,7 @@ void SparseToDenseOpenCLKernel::SetGlobalLocal() { | |||||
| } | } | ||||
| int SparseToDenseOpenCLKernel::Prepare() { | int SparseToDenseOpenCLKernel::Prepare() { | ||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| input_dim_ = in_tensors_[0]->shape().size(); | input_dim_ = in_tensors_[0]->shape().size(); | ||||
| inshapeindex1_dim = in_tensors_[0]->shape()[1]; | inshapeindex1_dim = in_tensors_[0]->shape()[1]; | ||||
| weight_scalar_ = in_tensors_[2]->IsScalar(); | weight_scalar_ = in_tensors_[2]->IsScalar(); | ||||
| @@ -149,10 +150,10 @@ int SparseToDenseOpenCLKernel::Prepare() { | |||||
| std::string program_name = "SparseToDense"; | std::string program_name = "SparseToDense"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | std::vector<std::string> build_options_ext; | ||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| if (enable_fp16_) { | |||||
| build_options_ext = {" -DDTYPE=half "}; | |||||
| } else { | |||||
| build_options_ext = {" -DDTYPE=float "}; | |||||
| } | } | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| @@ -216,6 +217,5 @@ int SparseToDenseOpenCLKernel::Run() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SparseToDense, OpenCLKernelCreator<SparseToDenseOpenCLKernel>); | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SparseToDense, OpenCLKernelCreator<SparseToDenseOpenCLKernel>); | |||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_SparseToDense, OpenCLKernelCreator<SparseToDenseOpenCLKernel>); | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -136,12 +136,7 @@ int SplitOpenCLKernel::Prepare() { | |||||
| std::string source = split_source; | std::string source = split_source; | ||||
| std::string program_name = "split"; | std::string program_name = "split"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -165,12 +165,7 @@ int StackOpenCLKernel::Prepare() { | |||||
| std::string source = stack_source; | std::string source = stack_source; | ||||
| std::string program_name = "stack"; | std::string program_name = "stack"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -32,12 +32,7 @@ int StrassenOpenCLKernel::Prepare() { | |||||
| std::string source = strassen_source; | std::string source = strassen_source; | ||||
| std::string program_name = "MatMul"; | std::string program_name = "MatMul"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| ocl_runtime_->BuildKernel(kernel_IMG_add_sub_2, program_name, "MatMul_IMG_Add_Sub_2", build_options_ext); | ocl_runtime_->BuildKernel(kernel_IMG_add_sub_2, program_name, "MatMul_IMG_Add_Sub_2", build_options_ext); | ||||
| ocl_runtime_->BuildKernel(kernel_BUF_add_sub_2, program_name, "MatMul_BUF_Add_Sub_2", build_options_ext); | ocl_runtime_->BuildKernel(kernel_BUF_add_sub_2, program_name, "MatMul_BUF_Add_Sub_2", build_options_ext); | ||||
| @@ -89,12 +89,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() { | |||||
| int StridedSliceOpenCLKernel::Prepare() { | int StridedSliceOpenCLKernel::Prepare() { | ||||
| std::string program_name = "strided_slice"; | std::string program_name = "strided_slice"; | ||||
| ocl_runtime_->LoadSource(program_name, strided_slice_source); | ocl_runtime_->LoadSource(program_name, strided_slice_source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options_ext); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -110,12 +110,7 @@ int TransposeOpenCLKernel::Prepare() { | |||||
| std::string source = transpose_source; | std::string source = transpose_source; | ||||
| std::string program_name = "transpose"; | std::string program_name = "transpose"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); | ||||
| #endif | #endif | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -82,12 +82,7 @@ std::vector<float> GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si | |||||
| void WinogradOpenCLKernel::BuildKernel() { | void WinogradOpenCLKernel::BuildKernel() { | ||||
| std::string program_name = "winograd"; | std::string program_name = "winograd"; | ||||
| ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source); | ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source); | ||||
| std::vector<std::string> build_options_ext; | |||||
| if (desc_.data_type == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (desc_.data_type == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| auto build_options_ext = CreateBuildOptionsExtByDType(desc_.data_type); | |||||
| ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options_ext); | ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options_ext); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, | ocl_runtime_->BuildKernel(kernel_, program_name, | ||||
| filter_type_ == MemType::IMG ? "WinogradConv2D_Img" : "WinogradConv2D", build_options_ext); | filter_type_ == MemType::IMG ? "WinogradConv2D_Img" : "WinogradConv2D", build_options_ext); | ||||
| @@ -327,4 +327,15 @@ void FreeTmpWeight(lite::Tensor *tensor) { | |||||
| } | } | ||||
| } | } | ||||
| std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id) { | |||||
| std::vector<std::string> build_options_ext; | |||||
| if (type_id == kNumberTypeInt32) { | |||||
| build_options_ext = {" -DDTYPE=int -DDTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "}; | |||||
| } else if (type_id == kNumberTypeFloat32) { | |||||
| build_options_ext = {" -DDTYPE=float -DDTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "}; | |||||
| } else if (type_id == kNumberTypeFloat16) { | |||||
| build_options_ext = {" -DDTYPE=half -DDTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "}; | |||||
| } | |||||
| return build_options_ext; | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -67,6 +67,8 @@ int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tens | |||||
| void StoreTmpWeight(lite::Tensor *tensor); | void StoreTmpWeight(lite::Tensor *tensor); | ||||
| void FreeTmpWeight(lite::Tensor *tensor); | void FreeTmpWeight(lite::Tensor *tensor); | ||||
| std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id); | |||||
| template <class T1, class T2> | template <class T1, class T2> | ||||
| void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane_in, int plane_out, int channel, | void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane_in, int plane_out, int channel, | ||||
| const std::function<T2(T1)> &to_dtype) { | const std::function<T2(T1)> &to_dtype) { | ||||
| @@ -99,3 +99,54 @@ Q_crnn_ori_75w_slim_norm.pb | |||||
| Q_crnn_ori_v2_405001_notrans_nopre.pb | Q_crnn_ori_v2_405001_notrans_nopre.pb | ||||
| bolt_segment.pb | bolt_segment.pb | ||||
| ml_location_lane_counter.onnx;2 | ml_location_lane_counter.onnx;2 | ||||
| gts_detect_5k_tf115.tflite | |||||
| smartreply.tflite | |||||
| ml_text_correction.tflite | |||||
| ml_ocr_jk_pb2tflite.tflite | |||||
| scan_hms_angle_pb2tflite.tflite | |||||
| scan_hms_detect_pb2tflite.tflite | |||||
| ml_face_openclose_tflite.tflite | |||||
| unet_mbv2_05_104pts.tflite | |||||
| hiai_AADB_HADB_MBV2_model_f16.tflite | |||||
| hiai_AADB_HADB_MBV2_model_fp32.tflite | |||||
| hiai_detect_curve_model_float32.tflite | |||||
| hiai_detectmodel_06_23_960_480_1180700.tflite | |||||
| lite-model_aiy_vision_classifier_food_V1_1.tflite | |||||
| lite-model_disease-classification_1.tflite | |||||
| lite-model_models_mushroom-identification_v1_1.tflite | |||||
| smartreply_1_default_1.tflite | |||||
| text_classification.tflite | |||||
| Q_detect_fpn_add_inception-1448650.tflite | |||||
| Q_hand_0812_pb2tflite.tflite | |||||
| bloom_landmark.tflite | |||||
| Q888_face_dress_mv3y.tflite | |||||
| Q888_HADB_AADB_MBV2_model_fp32.tflite | |||||
| Q888_landmark.tflite | |||||
| Q888_pose.tflite | |||||
| Q888_lapa158_unet_0924.tflite | |||||
| Q888_isface.tflite | |||||
| Q888_new_detect.tflite | |||||
| Q888_model_normalize_object_scene_ps_20200826_f32_no_softmax.tflite | |||||
| Q888_face_emo_dress_mv3_orderd.tflite | |||||
| hdc_age_medium | |||||
| hdc_contour_pose_128 | |||||
| hdc_emotion | |||||
| hdc_fivembnet | |||||
| hdc_isface | |||||
| hdc_mobilenetface | |||||
| hdc_retinaface | |||||
| hdc_resnet | |||||
| mtk_model_normalize_object_scene_ps_20200519_f32.tflite | |||||
| hiai_cpu_face_emotion.pb | |||||
| hiai_cpu_face_gazing.pb | |||||
| hiai_cpu_face_headpose.pb | |||||
| hiai_ctpn_feature_map.pb | |||||
| hiai_cv_focusShootOCRModel_02.pb | |||||
| hiai_cv_focusShootOCRModel_08.pb | |||||
| hiai_cv_poseEstimation.pb | |||||
| hiai_detectmodel_06_23_960_480_1180700.pb | |||||
| hiai_face_model_npu.pb | |||||
| hiai_iMaxDN_RGB.pb | |||||
| hiai_iMaxSR_RGB.pb | |||||
| hiai_lm_inference_graph.pb | |||||
| hiai_PoseEstimation_Pcm.pb | |||||