| @@ -40,7 +40,7 @@ set(LITE_SRC | |||||
| if (SUPPORT_GPU) | if (SUPPORT_GPU) | ||||
| set(LITE_SRC | set(LITE_SRC | ||||
| ${LITE_SRC} | ${LITE_SRC} | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/subgraph_opencl_kernel.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_subgraph.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/utils.cc | ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/utils.cc | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_executor.cc | ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_executor.cc | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_allocator.cc | ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_allocator.cc | ||||
| @@ -1,7 +1,10 @@ | |||||
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable | #pragma OPENCL EXTENSION cl_khr_fp16 : enable | ||||
| __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | ||||
| #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) | |||||
| #define C4NUM 4 | |||||
| #define CHECK_IDXConcat2input_NHWC4 \ | |||||
| // Align in Axis C for concat | |||||
| #define CHECK_IDX \ | |||||
| int X = get_global_id(0); \ | int X = get_global_id(0); \ | ||||
| int Y = get_global_id(1); \ | int Y = get_global_id(1); \ | ||||
| int Z = get_global_id(2); \ | int Z = get_global_id(2); \ | ||||
| @@ -10,536 +13,318 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | | |||||
| } \ | } \ | ||||
| FLT4 result; | FLT4 result; | ||||
| #define DOConcat2inputaxis1_NHWC4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat2inputaxis2_NHWC4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat2inputaxis3_NHWC4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define CHECK_IDXConcat2input_NC4HW4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| if (input_shape0.y == 0 || input_shape1.y == 0 || output_shape.y == 0) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int in_postion_x; \ | |||||
| int out_pos_x = (X / output_shape.y) * output_shape.w * output_shape.y + Z * output_shape.y + X % output_shape.y; \ | |||||
| FLT4 result; | |||||
| #define DOConcat2inputaxis1_NC4HW4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = ((X - input_shape0.y) / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + \ | |||||
| ((X - input_shape0.y) % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat2inputaxis2_NC4HW4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat2inputaxis3_NC4HW4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + (Z - input_shape0.w) * input_shape1.y + \ | |||||
| (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define CHECK_IDXConcat3input_NC4HW4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| if (input_shape0.y == 0 || input_shape1.y == 0 || input_shape2.y == 0 || output_shape.y == 0) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int in_postion_x; \ | |||||
| int out_pos_x = (X / output_shape.y) * output_shape.w * output_shape.y + Z * output_shape.y + X % output_shape.y; \ | |||||
| FLT4 result; | |||||
| #define DOConcat3inputaxis1_NC4HW4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < input_shape0.y + input_shape1.y) { \ | |||||
| in_postion_x = ((X - input_shape0.y) / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + \ | |||||
| ((X - input_shape0.y) % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = ((X - input_shape0.y - input_shape1.y) / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| Z * input_shape2.y + ((X - input_shape0.y - input_shape1.y) % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat3inputaxis2_NC4HW4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Y < input_shape0.z + input_shape1.z) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + Z * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat3inputaxis3_NC4HW4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < input_shape0.w + input_shape1.w) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + (Z - input_shape0.w) * input_shape1.y + \ | |||||
| (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w) * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define CHECK_IDXConcat3input_NHWC4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| FLT4 result; | |||||
| #define DOConcat3inputaxis1_NHWC4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z, (X - input_shape0.y - input_shape1.y))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat3inputaxis2_NHWC4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z) * input_shape2.w + Z, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat3inputaxis3_NHWC4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z - input_shape0.w - input_shape1.w, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define CHECK_IDXConcat4input_NHWC4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| FLT4 result; | |||||
| #define DOConcat4inputaxis1_NHWC4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z, (X - input_shape0.y - input_shape1.y))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y)*input_shape3.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat4inputaxis2_NHWC4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z) * input_shape2.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z) * input_shape3.w + Z, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| // axis = 1 | |||||
| #define DOConcat2inputaxis1_NHWC4 \ | |||||
| int IN = X / output_shape.y; \ | |||||
| int IH = X % output_shape.y; \ | |||||
| int boundary0 = input_shape0.y; \ | |||||
| int boundary1 = boundary0 + input_shape1.y; \ | |||||
| if (IH < boundary0) { \ | |||||
| int coordinate_x = Y * input_shape0.w + Z; \ | |||||
| int coordinate_y = IN * input_shape0.y + IH; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } else if (IH < boundary1) { \ | |||||
| int coordinate_x = Y * input_shape1.w + Z; \ | |||||
| int coordinate_y = IN * input_shape1.y + IH - boundary0; \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat4inputaxis3_NHWC4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z - input_shape0.w - input_shape1.w, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y)*input_shape3.w + Z - input_shape0.w - input_shape1.w - input_shape2.w, (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat3inputaxis1_NHWC4 \ | |||||
| DOConcat2inputaxis1_NHWC4; \ | |||||
| int boundary2 = boundary1 + input_shape2.y; \ | |||||
| if (IH >= boundary1 && IH < boundary2) { \ | |||||
| int coordinate_x = Y * input_shape2.w + Z; \ | |||||
| int coordinate_y = IN * input_shape2.y + IH - boundary1; \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define CHECK_IDXConcat4input_NC4HW4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| if (input_shape0.y == 0 || input_shape1.y == 0 || input_shape2.y == 0 || input_shape3.y == 0 || \ | |||||
| output_shape.y == 0) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int in_postion_x; \ | |||||
| int out_pos_x = (X / output_shape.y) * output_shape.w * output_shape.y + Z * output_shape.y + X % output_shape.y; \ | |||||
| FLT4 result; | |||||
| #define DOConcat4inputaxis1_NHWC4 \ | |||||
| DOConcat3inputaxis1_NHWC4; \ | |||||
| int boundary3 = boundary2 + input_shape3.y; \ | |||||
| if (IH >= boundary2 && IH < boundary3) { \ | |||||
| int coordinate_x = Y * input_shape3.w + Z; \ | |||||
| int coordinate_y = IN * input_shape3.y + IH - boundary2; \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat4inputaxis1_NC4HW4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < input_shape0.y + input_shape1.y) { \ | |||||
| in_postion_x = ((X - input_shape0.y) / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + \ | |||||
| ((X - input_shape0.y) % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < input_shape0.y + input_shape1.y + input_shape2.y) { \ | |||||
| in_postion_x = ((X - input_shape0.y - input_shape1.y) / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| Z * input_shape2.y + ((X - input_shape0.y - input_shape1.y) % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = \ | |||||
| ((X - input_shape0.y - input_shape1.y - input_shape2.y) / input_shape3.y) * input_shape3.w * input_shape3.y + \ | |||||
| Z * input_shape3.y + ((X - input_shape0.y - input_shape1.y - input_shape2.y) % input_shape3.y); \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat5inputaxis1_NHWC4 \ | |||||
| DOConcat4inputaxis1_NHWC4; \ | |||||
| int boundary4 = boundary3 + input_shape4.y; \ | |||||
| if (IH >= boundary3 && IH < boundary4) { \ | |||||
| int coordinate_x = Y * input_shape4.w + Z; \ | |||||
| int coordinate_y = IN * input_shape4.y + IH - boundary3; \ | |||||
| result = READ_IMAGE(input4, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat4inputaxis2_NC4HW4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Y < input_shape0.z + input_shape1.z) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z), in_postion_x)); \ | |||||
| } else if (Y < input_shape0.z + input_shape1.z + input_shape2.z) { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + Z * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape3.y) * input_shape3.w * input_shape3.y + Z * input_shape3.y + (X % input_shape3.y); \ | |||||
| result = \ | |||||
| READ_IMAGE(input3, smp_none, (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat6inputaxis1_NHWC4 \ | |||||
| DOConcat5inputaxis1_NHWC4; \ | |||||
| int boundary5 = boundary4 + input_shape5.y; \ | |||||
| if (IH >= boundary4 && IH < boundary5) { \ | |||||
| int coordinate_x = Y * input_shape5.w + Z; \ | |||||
| int coordinate_y = IN * input_shape5.y + IH - boundary4; \ | |||||
| result = READ_IMAGE(input5, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat4inputaxis3_NC4HW4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < input_shape0.w + input_shape1.w) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + (Z - input_shape0.w) * input_shape1.y + \ | |||||
| (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < input_shape0.w + input_shape1.w + input_shape2.w) { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w) * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape3.y) * input_shape3.w * input_shape3.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w - input_shape2.w) * input_shape3.y + (X % input_shape3.y); \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| // axis = 2 | |||||
| #define DOConcat2inputaxis2_NHWC4 \ | |||||
| int boundary0 = input_shape0.z; \ | |||||
| int boundary1 = boundary0 + input_shape1.z; \ | |||||
| if (Y < boundary0) { \ | |||||
| int coordinate_x = Y * input_shape0.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } else { \ | |||||
| int coordinate_x = (Y - boundary0) * input_shape1.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| __kernel void Concat4input_NC4HW4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __read_only image2d_t input2, __read_only image2d_t input3, | |||||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, | |||||
| int4 input_shape2, int4 input_shape3, int4 output_shape, const int axis) {} | |||||
| #define DOConcat3inputaxis2_NHWC4 \ | |||||
| DOConcat2inputaxis2_NHWC4; \ | |||||
| int boundary2 = boundary1 + input_shape2.z; \ | |||||
| if (Y >= boundary1 && Y < boundary2) { \ | |||||
| int coordinate_x = (Y - boundary1) * input_shape2.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define CHECK_IDXConcat6input_NHWC4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| FLT4 result; | |||||
| #define DOConcat4inputaxis2_NHWC4 \ | |||||
| DOConcat3inputaxis2_NHWC4; \ | |||||
| int boundary3 = boundary2 + input_shape3.z; \ | |||||
| if (Y >= boundary2 && Y < boundary3) { \ | |||||
| int coordinate_x = (Y - boundary2) * input_shape3.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis1_NHWC4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z, (X - input_shape0.y - input_shape1.y))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y)) { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y)*input_shape3.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y))); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y + input_shape4.y)) { \ | |||||
| result = READ_IMAGE( \ | |||||
| input4, smp_none, \ | |||||
| (int2)((Y)*input_shape4.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input5, smp_none, \ | |||||
| (int2)((Y)*input_shape5.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - \ | |||||
| input_shape3.y - input_shape4.y))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat5inputaxis2_NHWC4 \ | |||||
| DOConcat4inputaxis2_NHWC4; \ | |||||
| int boundary4 = boundary3 + input_shape4.z; \ | |||||
| if (Y >= boundary3 && Y < boundary4) { \ | |||||
| int coordinate_x = (Y - boundary3) * input_shape4.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input4, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis2_NHWC4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z) * input_shape2.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z)) { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z) * input_shape3.w + Z, (X))); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z + input_shape4.z)) { \ | |||||
| result = READ_IMAGE( \ | |||||
| input4, smp_none, \ | |||||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z) * input_shape4.w + Z, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE( \ | |||||
| input5, smp_none, \ | |||||
| (int2)( \ | |||||
| (Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z - input_shape4.z) * input_shape5.w + Z, \ | |||||
| (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| #define DOConcat6inputaxis2_NHWC4 \ | |||||
| DOConcat5inputaxis2_NHWC4; \ | |||||
| int boundary5 = boundary4 + input_shape5.z; \ | |||||
| if (Y >= boundary4 && Y < boundary5) { \ | |||||
| int coordinate_x = (Y - boundary4) * input_shape5.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input5, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis3_NHWC4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w)) { \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w)) { \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z - input_shape0.w - input_shape1.w, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w)) { \ | |||||
| result = READ_IMAGE(input3, smp_none, \ | |||||
| (int2)((Y)*input_shape3.w + Z - input_shape0.w - input_shape1.w - input_shape2.w, (X))); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w + input_shape4.w)) { \ | |||||
| result = READ_IMAGE( \ | |||||
| input4, smp_none, \ | |||||
| (int2)((Y)*input_shape4.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - input_shape3.w, (X))); \ | |||||
| } else { \ | |||||
| result = READ_IMAGE(input5, smp_none, \ | |||||
| (int2)((Y)*input_shape5.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - \ | |||||
| input_shape3.w - input_shape4.w, \ | |||||
| (X))); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| // axis = 3 | |||||
| #define DOConcat2inputaxis3_NHWC4 \ | |||||
| int boundary0 = input_shape0.w; \ | |||||
| int boundary1 = boundary0 + input_shape1.w; \ | |||||
| if (Z < boundary0) { \ | |||||
| int coordinate_x = Y * input_shape0.w + Z; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } else { \ | |||||
| int coordinate_x = Y * input_shape1.w + Z - boundary0; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define CHECK_IDXConcat6input_NC4HW4 \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { \ | |||||
| return; \ | |||||
| } \ | |||||
| if (input_shape0.y == 0 || input_shape1.y == 0 || input_shape2.y == 0 || input_shape3.y == 0 || \ | |||||
| input_shape4.y == 0 || input_shape5.y == 0 || output_shape.y == 0) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int in_postion_x; \ | |||||
| FLT4 result; \ | |||||
| int out_pos_x = (X / output_shape.y) * output_shape.w * output_shape.y + Z * output_shape.y + X % output_shape.y; | |||||
| #define DOConcat3inputaxis3_NHWC4 \ | |||||
| DOConcat2inputaxis3_NHWC4; \ | |||||
| int boundary2 = boundary1 + input_shape2.w; \ | |||||
| if (Z >= boundary1 && Z < boundary2) { \ | |||||
| int coordinate_x = Y * input_shape2.w + Z - boundary1; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis1_NC4HW4 \ | |||||
| if (X < input_shape0.y) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y)) { \ | |||||
| in_postion_x = ((X - input_shape0.y) / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + \ | |||||
| ((X - input_shape0.y) % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y)) { \ | |||||
| in_postion_x = ((X - input_shape0.y - input_shape1.y) / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| Z * input_shape2.y + ((X - input_shape0.y - input_shape1.y) % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y)) { \ | |||||
| in_postion_x = \ | |||||
| ((X - input_shape0.y - input_shape1.y - input_shape2.y) / input_shape3.y) * input_shape3.w * input_shape3.y + \ | |||||
| Z * input_shape3.y + ((X - input_shape0.y - input_shape1.y - input_shape2.y) % input_shape3.y); \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y + input_shape4.y)) { \ | |||||
| in_postion_x = ((X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y) / input_shape4.y) * \ | |||||
| input_shape4.w * input_shape4.y + \ | |||||
| Z * input_shape4.y + \ | |||||
| ((X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y) % input_shape4.y); \ | |||||
| result = READ_IMAGE(input4, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = \ | |||||
| ((X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y - input_shape4.y) / input_shape5.y) * \ | |||||
| input_shape5.w * input_shape5.y + \ | |||||
| Z * input_shape5.y + \ | |||||
| ((X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y - input_shape4.y) % input_shape5.y); \ | |||||
| result = READ_IMAGE(input5, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat4inputaxis3_NHWC4 \ | |||||
| DOConcat3inputaxis3_NHWC4; \ | |||||
| int boundary3 = boundary2 + input_shape3.w; \ | |||||
| if (Z >= boundary2 && Z < boundary3) { \ | |||||
| int coordinate_x = Y * input_shape3.w + Z - boundary2; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis2_NC4HW4 \ | |||||
| if (Y < input_shape0.z) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z)) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + Z * input_shape1.y + (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z), in_postion_x)); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z)) { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + Z * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z), in_postion_x)); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z)) { \ | |||||
| in_postion_x = (X / input_shape3.y) * input_shape3.w * input_shape3.y + Z * input_shape3.y + (X % input_shape3.y); \ | |||||
| result = \ | |||||
| READ_IMAGE(input3, smp_none, (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z), in_postion_x)); \ | |||||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z + input_shape4.z)) { \ | |||||
| in_postion_x = (X / input_shape4.y) * input_shape4.w * input_shape4.y + Z * input_shape4.y + (X % input_shape4.y); \ | |||||
| result = \ | |||||
| READ_IMAGE(input4, smp_none, \ | |||||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = (X / input_shape5.y) * input_shape5.w * input_shape5.y + Z * input_shape5.y + (X % input_shape5.y); \ | |||||
| result = READ_IMAGE( \ | |||||
| input5, smp_none, \ | |||||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z - input_shape4.z), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat5inputaxis3_NHWC4 \ | |||||
| DOConcat4inputaxis3_NHWC4; \ | |||||
| int boundary4 = boundary3 + input_shape4.w; \ | |||||
| if (Z >= boundary3 && Z < boundary4) { \ | |||||
| int coordinate_x = Y * input_shape4.w + Z - boundary3; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input4, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define DOConcat6inputaxis3_NC4HW4 \ | |||||
| if (Z < input_shape0.w) { \ | |||||
| in_postion_x = (X / input_shape0.y) * input_shape0.w * input_shape0.y + Z * input_shape0.y + X % input_shape0.y; \ | |||||
| result = READ_IMAGE(input0, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w)) { \ | |||||
| in_postion_x = (X / input_shape1.y) * input_shape1.w * input_shape1.y + (Z - input_shape0.w) * input_shape1.y + \ | |||||
| (X % input_shape1.y); \ | |||||
| result = READ_IMAGE(input1, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w)) { \ | |||||
| in_postion_x = (X / input_shape2.y) * input_shape2.w * input_shape2.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w) * input_shape2.y + (X % input_shape2.y); \ | |||||
| result = READ_IMAGE(input2, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w)) { \ | |||||
| in_postion_x = (X / input_shape3.y) * input_shape3.w * input_shape3.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w - input_shape2.w) * input_shape3.y + (X % input_shape3.y); \ | |||||
| result = READ_IMAGE(input3, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w + input_shape4.w)) { \ | |||||
| in_postion_x = (X / input_shape4.y) * input_shape4.w * input_shape4.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w - input_shape2.w - input_shape3.w) * input_shape4.y + \ | |||||
| (X % input_shape4.y); \ | |||||
| result = READ_IMAGE(input4, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } else { \ | |||||
| in_postion_x = \ | |||||
| (X / input_shape5.y) * input_shape5.w * input_shape5.y + \ | |||||
| (Z - input_shape0.w - input_shape1.w - input_shape2.w - input_shape3.w - input_shape4.w) * input_shape5.y + \ | |||||
| (X % input_shape5.y); \ | |||||
| result = READ_IMAGE(input5, smp_none, (int2)((Y), in_postion_x)); \ | |||||
| } \ | |||||
| WRITE_IMAGE(output, (int2)((Y), out_pos_x), result); | |||||
| #define DOConcat6inputaxis3_NHWC4 \ | |||||
| DOConcat5inputaxis3_NHWC4; \ | |||||
| int boundary5 = boundary4 + input_shape5.w; \ | |||||
| if (Z >= boundary4 && Z < boundary5) { \ | |||||
| int coordinate_x = Y * input_shape5.w + Z - boundary4; \ | |||||
| int coordinate_y = X; \ | |||||
| result = READ_IMAGE(input5, smp_none, (int2)(coordinate_x, coordinate_y)); \ | |||||
| } | |||||
| #define CONCAT6(Inputnum, Axis, ToFormat) \ | #define CONCAT6(Inputnum, Axis, ToFormat) \ | ||||
| __kernel void Concat##Inputnum##Axis##ToFormat( \ | __kernel void Concat##Inputnum##Axis##ToFormat( \ | ||||
| __read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, \ | __read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, \ | ||||
| __read_only image2d_t input3, __read_only image2d_t input4, __read_only image2d_t input5, \ | __read_only image2d_t input3, __read_only image2d_t input4, __read_only image2d_t input5, \ | ||||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, int4 input_shape3, \ | __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, int4 input_shape3, \ | ||||
| int4 input_shape4, int4 input_shape5, int4 output_shape, const int axis) { \ | |||||
| CHECK_IDXConcat6input##ToFormat; \ | |||||
| int4 input_shape4, int4 input_shape5, int4 output_shape) { \ | |||||
| CHECK_IDX; \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | DOConcat##Inputnum##Axis##ToFormat; \ | ||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); \ | |||||
| } | |||||
| #define CONCAT5(Inputnum, Axis, ToFormat) \ | |||||
| __kernel void Concat##Inputnum##Axis##ToFormat( \ | |||||
| __read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, \ | |||||
| __read_only image2d_t input3, __read_only image2d_t input4, __write_only image2d_t output, int4 input_shape0, \ | |||||
| int4 input_shape1, int4 input_shape2, int4 input_shape3, int4 input_shape4, int4 output_shape) { \ | |||||
| CHECK_IDX; \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); \ | |||||
| } | } | ||||
| #define CONCAT4(Inputnum, Axis, ToFormat) \ | |||||
| __kernel void Concat##Inputnum##Axis##ToFormat( \ | |||||
| __read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, \ | |||||
| __read_only image2d_t input3, __write_only image2d_t output, int4 input_shape0, int4 input_shape1, \ | |||||
| int4 input_shape2, int4 input_shape3, int4 output_shape, const int axis) { \ | |||||
| CHECK_IDXConcat4input##ToFormat; \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | |||||
| #define CONCAT4(Inputnum, Axis, ToFormat) \ | |||||
| __kernel void Concat##Inputnum##Axis##ToFormat(__read_only image2d_t input0, __read_only image2d_t input1, \ | |||||
| __read_only image2d_t input2, __read_only image2d_t input3, \ | |||||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, \ | |||||
| int4 input_shape2, int4 input_shape3, int4 output_shape) { \ | |||||
| CHECK_IDX \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); \ | |||||
| } | } | ||||
| #define CONCAT3(Inputnum, Axis, ToFormat) \ | |||||
| __kernel void Concat##Inputnum##Axis##ToFormat(__read_only image2d_t input0, __read_only image2d_t input1, \ | |||||
| __read_only image2d_t input2, __write_only image2d_t output, \ | |||||
| int4 input_shape0, int4 input_shape1, int4 input_shape2, \ | |||||
| int4 output_shape, const int axis) { \ | |||||
| CHECK_IDXConcat3input##ToFormat; \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | |||||
| #define CONCAT3(Inputnum, Axis, ToFormat) \ | |||||
| __kernel void Concat##Inputnum##Axis##ToFormat( \ | |||||
| __read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, \ | |||||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, int4 output_shape) { \ | |||||
| CHECK_IDX \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); \ | |||||
| } | } | ||||
| #define CONCAT2(Inputnum, Axis, ToFormat) \ | #define CONCAT2(Inputnum, Axis, ToFormat) \ | ||||
| __kernel void Concat##Inputnum##Axis##ToFormat(__read_only image2d_t input0, __read_only image2d_t input1, \ | __kernel void Concat##Inputnum##Axis##ToFormat(__read_only image2d_t input0, __read_only image2d_t input1, \ | ||||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, \ | __write_only image2d_t output, int4 input_shape0, int4 input_shape1, \ | ||||
| int4 output_shape, const int axis) { \ | |||||
| CHECK_IDXConcat2input##ToFormat; \ | |||||
| int4 output_shape) { \ | |||||
| CHECK_IDX \ | |||||
| DOConcat##Inputnum##Axis##ToFormat; \ | DOConcat##Inputnum##Axis##ToFormat; \ | ||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); \ | |||||
| } | } | ||||
| // nc4hw4 | |||||
| CONCAT6(6input, axis1, _NC4HW4) | |||||
| CONCAT6(6input, axis2, _NC4HW4) | |||||
| CONCAT6(6input, axis3, _NC4HW4) | |||||
| CONCAT4(4input, axis1, _NC4HW4) | |||||
| CONCAT4(4input, axis2, _NC4HW4) | |||||
| CONCAT4(4input, axis3, _NC4HW4) | |||||
| CONCAT3(3input, axis1, _NC4HW4) | |||||
| CONCAT3(3input, axis2, _NC4HW4) | |||||
| CONCAT3(3input, axis3, _NC4HW4) | |||||
| CONCAT2(2input, axis1, _NC4HW4) | |||||
| CONCAT2(2input, axis2, _NC4HW4) | |||||
| CONCAT2(2input, axis3, _NC4HW4) | |||||
| // nhwc4 | |||||
| // axis = 1 | |||||
| CONCAT6(6input, axis1, _NHWC4) | CONCAT6(6input, axis1, _NHWC4) | ||||
| CONCAT6(6input, axis2, _NHWC4) | |||||
| CONCAT6(6input, axis3, _NHWC4) | |||||
| CONCAT5(5input, axis1, _NHWC4) | |||||
| CONCAT4(4input, axis1, _NHWC4) | CONCAT4(4input, axis1, _NHWC4) | ||||
| CONCAT4(4input, axis2, _NHWC4) | |||||
| CONCAT4(4input, axis3, _NHWC4) | |||||
| CONCAT3(3input, axis1, _NHWC4) | CONCAT3(3input, axis1, _NHWC4) | ||||
| CONCAT3(3input, axis2, _NHWC4) | |||||
| CONCAT3(3input, axis3, _NHWC4) | |||||
| CONCAT2(2input, axis1, _NHWC4) | CONCAT2(2input, axis1, _NHWC4) | ||||
| // axis = 2 | |||||
| CONCAT6(6input, axis2, _NHWC4) | |||||
| CONCAT5(5input, axis2, _NHWC4) | |||||
| CONCAT4(4input, axis2, _NHWC4) | |||||
| CONCAT3(3input, axis2, _NHWC4) | |||||
| CONCAT2(2input, axis2, _NHWC4) | CONCAT2(2input, axis2, _NHWC4) | ||||
| // axis = 3 | |||||
| CONCAT6(6input, axis3, _NHWC4) | |||||
| CONCAT5(5input, axis3, _NHWC4) | |||||
| CONCAT4(4input, axis3, _NHWC4) | |||||
| CONCAT3(3input, axis3, _NHWC4) | |||||
| CONCAT2(2input, axis3, _NHWC4) | CONCAT2(2input, axis3, _NHWC4) | ||||
| // UnAlign in Axis C for concat | |||||
| #define CHECK_IDX_UNALIGN \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int IN = X / output_shape.y, IH = X % output_shape.y; \ | |||||
| int IW = Y; \ | |||||
| int Align_Shape0 = UP_DIV(input_shape0.w, C4NUM), Align_Shape1 = UP_DIV(input_shape1.w, C4NUM); \ | |||||
| int Align_OutShape = output_shape.w; \ | |||||
| int index_output = (IN * output_shape.y + IH) * stride_w + IW * Align_OutShape * C4NUM; | |||||
| int doconcat(__read_only image2d_t input, __global FLT *output, int Align_Shape, int4 input_shape, int IN, int IH, | |||||
| int Y, int index_output) { | |||||
| int Remainder = input_shape.w % C4NUM; | |||||
| for (int i = 0; i < Align_Shape; ++i) { | |||||
| FLT4 result = READ_IMAGE(input, smp_none, (int2)((Y * Align_Shape + i), (IN * input_shape.y + IH))); | |||||
| FLT result_temp[4] = {result.x, result.y, result.z, result.w}; | |||||
| if ((i + 1) * C4NUM <= input_shape.w) { | |||||
| for (int j = 0; j < C4NUM; ++j) { | |||||
| output[index_output++] = result_temp[j]; | |||||
| } | |||||
| } else { | |||||
| for (int j = 0; j < Remainder; ++j) { | |||||
| output[index_output++] = result_temp[j]; | |||||
| } | |||||
| } | |||||
| } | |||||
| return index_output; | |||||
| } | |||||
| __kernel void ConcatInput2UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __global FLT *output, int4 input_shape0, int4 input_shape1, int stride_w, | |||||
| int4 output_shape) { | |||||
| CHECK_IDX_UNALIGN; | |||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input1, output, Align_Shape1, input_shape1, IN, IH, Y, index_output); | |||||
| } | |||||
| __kernel void ConcatInput3UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __read_only image2d_t input2, __global FLT *output, int4 input_shape0, | |||||
| int4 input_shape1, int4 input_shape2, int stride_w, int4 output_shape) { | |||||
| CHECK_IDX_UNALIGN; | |||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM); | |||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input1, output, Align_Shape1, input_shape1, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input2, output, Align_Shape2, input_shape2, IN, IH, Y, index_output); | |||||
| } | |||||
| __kernel void ConcatInput4UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __read_only image2d_t input2, __read_only image2d_t input3, | |||||
| __global FLT *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| int4 input_shape3, int stride_w, int4 output_shape) { | |||||
| CHECK_IDX_UNALIGN; | |||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM), Align_Shape3 = UP_DIV(input_shape3.w, C4NUM); | |||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input1, output, Align_Shape1, input_shape1, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input2, output, Align_Shape2, input_shape2, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input3, output, Align_Shape3, input_shape3, IN, IH, Y, index_output); | |||||
| } | |||||
| __kernel void ConcatInput5UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __read_only image2d_t input2, __read_only image2d_t input3, | |||||
| __read_only image2d_t input4, __global FLT *output, int4 input_shape0, | |||||
| int4 input_shape1, int4 input_shape2, int4 input_shape3, int4 input_shape4, | |||||
| int stride_w, int4 output_shape) { | |||||
| CHECK_IDX_UNALIGN; | |||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM), Align_Shape3 = UP_DIV(input_shape3.w, C4NUM); | |||||
| int Align_Shape4 = UP_DIV(input_shape4.w, C4NUM); | |||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input1, output, Align_Shape1, input_shape1, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input2, output, Align_Shape2, input_shape2, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input3, output, Align_Shape3, input_shape3, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input4, output, Align_Shape4, input_shape4, IN, IH, Y, index_output); | |||||
| } | |||||
| __kernel void ConcatInput6UnAlign_NHWC4(__read_only image2d_t input0, __read_only image2d_t input1, | |||||
| __read_only image2d_t input2, __read_only image2d_t input3, | |||||
| __read_only image2d_t input4, __read_only image2d_t input5, | |||||
| __global FLT *output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||||
| int4 input_shape3, int4 input_shape4, int4 input_shape5, int stride_w, | |||||
| int4 output_shape) { | |||||
| CHECK_IDX_UNALIGN; | |||||
| int Align_Shape2 = UP_DIV(input_shape2.w, C4NUM), Align_Shape3 = UP_DIV(input_shape3.w, C4NUM); | |||||
| int Align_Shape4 = UP_DIV(input_shape4.w, C4NUM), Align_Shape5 = UP_DIV(input_shape5.w, C4NUM); | |||||
| index_output = doconcat(input0, output, Align_Shape0, input_shape0, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input1, output, Align_Shape1, input_shape1, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input2, output, Align_Shape2, input_shape2, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input3, output, Align_Shape3, input_shape3, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input4, output, Align_Shape4, input_shape4, IN, IH, Y, index_output); | |||||
| index_output = doconcat(input5, output, Align_Shape5, input_shape5, IN, IH, Y, index_output); | |||||
| } | |||||
| @@ -64,54 +64,107 @@ void ConcatGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> * | |||||
| } | } | ||||
| int ConcatOpenCLKernel::CheckSpecs() { | int ConcatOpenCLKernel::CheckSpecs() { | ||||
| if (in_tensors_[0]->shape().size() != 4) { | |||||
| MS_LOG(ERROR) << " only support dim = 4 "; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_); | auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_); | ||||
| MS_LOG(DEBUG) << " concat at axis=: " << param->axis_; | MS_LOG(DEBUG) << " concat at axis=: " << param->axis_; | ||||
| if (param->axis_ < 0) { | |||||
| param->axis_ += in_tensors_.front()->shape().size(); | |||||
| if (out_tensors_[0]->shape().size() > 4) { | |||||
| MS_LOG(ERROR) << " GPU Unsupported shape.size > 4 " | |||||
| << "your shape().size()=: " << out_tensors_[0]->shape().size(); | |||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (param->axis_ < 0 || param->axis_ > 3) { | |||||
| axis_ = param->axis_; | |||||
| if (axis_ < 0) { | |||||
| axis_ += in_tensors_.front()->shape().size(); | |||||
| } | |||||
| if (axis_ < 0 || axis_ > 3) { | |||||
| MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 "; | MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 "; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (out_tensors_[0]->shape().size() < 4 && op_parameter_->type_ == PrimitiveType_Concat && axis_ != 0) { | |||||
| if (out_tensors_[0]->shape().size() == 2) { | |||||
| axis_ = axis_ + 2; | |||||
| } else if (out_tensors_[0]->shape().size() == 3) { | |||||
| axis_ = axis_ + 1; | |||||
| } else { | |||||
| MS_LOG(ERROR) << " Unsupported axis =: " << axis_ << " shape().size()=: " << out_tensors_[0]->shape().size(); | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void ConcatOpenCLKernel::SetConstArgs() { | void ConcatOpenCLKernel::SetConstArgs() { | ||||
| auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_); | |||||
| auto output_shape = out_tensors_[0]->shape(); | |||||
| cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)}; | |||||
| int arg_cn = 2 * in_tensors_.size() + 1; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| GpuTensorInfo img_info(out_tensors_[0]); | |||||
| size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); | |||||
| stride_w = img_info.RowPitch() / dtype; | |||||
| cl_int4 output_shape_ = {}; | |||||
| for (int i = 0; i < out_tensors_[0]->shape().size(); ++i) { | |||||
| output_shape_.s[i] = out_tensors_[0]->shape()[i]; | |||||
| } | |||||
| Broadcast2GpuShape(out_shape_.s, output_shape_.s, out_tensors_[0]->shape().size(), 1); | |||||
| int arg_cn = in_tensors_.size() + 1; | |||||
| if (axis_ == 3 && !Align_) { | |||||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||||
| cl_int4 temp = {}; | |||||
| for (int j = 0; j < in_tensors_[i]->shape().size(); ++j) { | |||||
| temp.s[j] = in_tensors_[i]->shape()[j]; | |||||
| } | |||||
| Broadcast2GpuShape(in_shape_.s, temp.s, in_tensors_[i]->shape().size(), 1); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); | |||||
| } | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); | |||||
| } else { | |||||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||||
| cl_int4 temp = {}; | |||||
| for (int j = 0; j < in_tensors_[i]->shape().size(); ++j) { | |||||
| temp.s[j] = in_tensors_[i]->shape()[j]; | |||||
| } | |||||
| Broadcast2GpuShape(in_shape_.s, temp.s, in_tensors_[i]->shape().size(), 1); | |||||
| in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); | |||||
| } | |||||
| } | |||||
| out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); | |||||
| } | } | ||||
| void ConcatOpenCLKernel::SetGlobalLocal() { | void ConcatOpenCLKernel::SetGlobalLocal() { | ||||
| auto output_shape = out_tensors_[0]->shape(); | |||||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | ||||
| std::vector<size_t> local = {1, 1, 1}; | |||||
| uint32_t OH = output_shape[0] * output_shape[1]; | |||||
| uint32_t OW = output_shape[2]; | |||||
| uint32_t OC = output_shape[3]; | |||||
| std::vector<size_t> global = {OH, OW, OC}; | |||||
| if (axis_ == 3 && !Align_) { | |||||
| OH = out_shape_.s[0] * out_shape_.s[1]; | |||||
| OW = out_shape_.s[2]; | |||||
| global = {OH, OW, 1}; | |||||
| local = {1, 1, 1}; | |||||
| } else { | |||||
| OH = out_shape_.s[0] * out_shape_.s[1]; | |||||
| OW = out_shape_.s[2]; | |||||
| OC = out_shape_.s[3]; | |||||
| global = {OH, OW, OC}; | |||||
| local = {1, 1, 1}; | |||||
| } | |||||
| ConcatGetWorkGroup(global, &local, max_global[0]); | ConcatGetWorkGroup(global, &local, max_global[0]); | ||||
| OpenCLKernel::AlignGlobalLocal(global, local); | OpenCLKernel::AlignGlobalLocal(global, local); | ||||
| } | } | ||||
| int ConcatOpenCLKernel::Prepare() { | int ConcatOpenCLKernel::Prepare() { | ||||
| auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_); | |||||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||||
| int length = in_tensors_[0]->shape().size(); | |||||
| if (in_tensors_[i]->shape()[length - 1] % C4NUM != 0) { | |||||
| Align_ = false; | |||||
| } | |||||
| } | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| std::string kernel_name = "Concat"; | std::string kernel_name = "Concat"; | ||||
| if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 6) { | |||||
| kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(param->axis_); | |||||
| if (axis_ == 3 && !Align_) { | |||||
| kernel_name += "Input" + std::to_string(in_tensors_.size()) + "UnAlign"; | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << " input must be 2 , 3 , 4 or 6"; | |||||
| return RET_ERROR; | |||||
| if (2 <= in_tensors_.size() && in_tensors_.size() <= 6) { | |||||
| kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(axis_); | |||||
| } else { | |||||
| MS_LOG(ERROR) << " input must be less than 6 and more than 2 "; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| kernel_name += "_NHWC4"; | kernel_name += "_NHWC4"; | ||||
| MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; | MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; | ||||
| std::string source = concat_source; | std::string source = concat_source; | ||||
| @@ -119,27 +172,25 @@ int ConcatOpenCLKernel::Prepare() { | |||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| SetGlobalLocal(); | |||||
| SetConstArgs(); | SetConstArgs(); | ||||
| SetGlobalLocal(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConcatOpenCLKernel::Run() { | int ConcatOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_); | |||||
| if (param->axis_ == 0) { | |||||
| if (axis_ == 0) { | |||||
| return RunAxis0(); | return RunAxis0(); | ||||
| } | } | ||||
| if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 6) { | |||||
| if (2 <= in_tensors_.size() && in_tensors_.size() <= 6) { | |||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| for (int i = 0; i < in_tensors_.size(); ++i) { | for (int i = 0; i < in_tensors_.size(); ++i) { | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); | ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); | ||||
| } | } | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||||
| cl_int4 temp = {in_tensors_[i]->shape()[0], in_tensors_[i]->shape()[1], in_tensors_[i]->shape()[2], | |||||
| UP_DIV(in_tensors_[i]->shape()[3], C4NUM)}; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, temp); | |||||
| if (axis_ == 3 && !Align_) { | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } else { | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| } | } | ||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "unsupported input size :" << in_tensors_.size(); | MS_LOG(ERROR) << "unsupported input size :" << in_tensors_.size(); | ||||
| @@ -39,8 +39,20 @@ class ConcatOpenCLKernel : public OpenCLKernel { | |||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| int RunAxis0(); | |||||
| std::vector<size_t> local; | |||||
| uint32_t OH = {1}; | |||||
| uint32_t OW = {1}; | |||||
| uint32_t OC = {1}; | |||||
| std::vector<size_t> global; | |||||
| bool Align_{true}; | |||||
| bool enable_fp16_{false}; | |||||
| cl_int stride_w{1}; | |||||
| cl_int4 in_shape_{}; | |||||
| cl_int4 out_shape_{}; | |||||
| int axis_{0}; | |||||
| private: | |||||
| int RunAxis0(); | |||||
| cl::Kernel kernel_; | cl::Kernel kernel_; | ||||
| }; | }; | ||||
| @@ -14,7 +14,7 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include <set> | #include <set> | ||||
| #include "src/runtime/opencl/opencl_executor.h" | #include "src/runtime/opencl/opencl_executor.h" | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| @@ -26,11 +26,11 @@ using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| using mindspore::lite::opencl::MemType; | using mindspore::lite::opencl::MemType; | ||||
| SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } | |||||
| OpenCLSubGraph::~OpenCLSubGraph() { UnInit(); } | |||||
| void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( | |||||
| const std::vector<lite::Tensor *> &in_tensors, const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||||
| MemType mem_type) { | |||||
| void OpenCLSubGraph::ReplaceOutTensorAndKernelToNull(const std::vector<lite::Tensor *> &in_tensors, | |||||
| const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||||
| MemType mem_type) { | |||||
| for (size_t i = 0; i < in_tensors.size(); ++i) { | for (size_t i = 0; i < in_tensors.size(); ++i) { | ||||
| for (auto &jv : in_kernels.at(i)) { | for (auto &jv : in_kernels.at(i)) { | ||||
| MS_ASSERT(jv); | MS_ASSERT(jv); | ||||
| @@ -62,10 +62,10 @@ void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( | |||||
| } | } | ||||
| } | } | ||||
| void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, | |||||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| lite::Tensor *new_tensor, | |||||
| kernel::LiteKernel *in_convert_op, MemType mem_type) { | |||||
| void OpenCLSubGraph::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, | |||||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| lite::Tensor *new_tensor, kernel::LiteKernel *in_convert_op, | |||||
| MemType mem_type) { | |||||
| MS_ASSERT(in_convert_op); | MS_ASSERT(in_convert_op); | ||||
| auto in_opencl_op = reinterpret_cast<OpenCLKernel *>(in_convert_op); | auto in_opencl_op = reinterpret_cast<OpenCLKernel *>(in_convert_op); | ||||
| for (auto &iv : in_kernels) { | for (auto &iv : in_kernels) { | ||||
| @@ -96,11 +96,11 @@ void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor | |||||
| } | } | ||||
| } | } | ||||
| int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||||
| const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||||
| std::vector<lite::Tensor *> *out_tensors, | |||||
| std::vector<OpenCLToFormatParameter *> *out_parameters, | |||||
| std::vector<LiteKernel *> *out_convert_ops, MemType mem_type) { | |||||
| int OpenCLSubGraph::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||||
| const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||||
| std::vector<lite::Tensor *> *out_tensors, | |||||
| std::vector<OpenCLToFormatParameter *> *out_parameters, | |||||
| std::vector<LiteKernel *> *out_convert_ops, MemType mem_type) { | |||||
| MS_ASSERT(out_tensors); | MS_ASSERT(out_tensors); | ||||
| MS_ASSERT(out_parameters); | MS_ASSERT(out_parameters); | ||||
| MS_ASSERT(out_convert_ops); | MS_ASSERT(out_convert_ops); | ||||
| @@ -120,7 +120,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| auto *new_tensor = new (std::nothrow) lite::Tensor(); | auto *new_tensor = new (std::nothrow) lite::Tensor(); | ||||
| MS_ASSERT(new_tensor); | MS_ASSERT(new_tensor); | ||||
| if (new_tensor == nullptr) { | if (new_tensor == nullptr) { | ||||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel new tensor failed!"; | |||||
| MS_LOG(ERROR) << "OpenCLSubGraph new tensor failed!"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| new_tensor->CopyTensor(*in_tensors[i]); | new_tensor->CopyTensor(*in_tensors[i]); | ||||
| @@ -141,7 +141,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter))); | auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter))); | ||||
| MS_ASSERT(parameter); | MS_ASSERT(parameter); | ||||
| if (parameter == nullptr) { | if (parameter == nullptr) { | ||||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel new parameter failed!"; | |||||
| MS_LOG(ERROR) << "OpenCLSubGraph new parameter failed!"; | |||||
| delete new_tensor; | delete new_tensor; | ||||
| new_tensor = nullptr; | new_tensor = nullptr; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -161,7 +161,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| } | } | ||||
| MS_ASSERT(in_convert_op); | MS_ASSERT(in_convert_op); | ||||
| if (in_convert_op == nullptr) { | if (in_convert_op == nullptr) { | ||||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel create op failed!"; | |||||
| MS_LOG(ERROR) << "OpenCLSubGraph create op failed!"; | |||||
| delete new_tensor; | delete new_tensor; | ||||
| new_tensor = nullptr; | new_tensor = nullptr; | ||||
| free(parameter); | free(parameter); | ||||
| @@ -189,7 +189,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int SubGraphOpenCLKernel::Init() { | |||||
| int OpenCLSubGraph::Init() { | |||||
| allocator_ = ocl_runtime_->GetAllocator(); | allocator_ = ocl_runtime_->GetAllocator(); | ||||
| MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | ||||
| for (const auto tensor : in_tensors_) { | for (const auto tensor : in_tensors_) { | ||||
| @@ -233,7 +233,7 @@ int SubGraphOpenCLKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void SubGraphOpenCLKernel::UpdateTensorDataType() { | |||||
| void OpenCLSubGraph::UpdateTensorDataType() { | |||||
| bool is_fp16 = ocl_runtime_->GetFp16Enable(); | bool is_fp16 = ocl_runtime_->GetFp16Enable(); | ||||
| MS_ASSERT(in_tensors_[0]); | MS_ASSERT(in_tensors_[0]); | ||||
| if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { | if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { | ||||
| @@ -253,7 +253,7 @@ void SubGraphOpenCLKernel::UpdateTensorDataType() { | |||||
| } | } | ||||
| } | } | ||||
| int SubGraphOpenCLKernel::MallocTensorWithReuse() { | |||||
| int OpenCLSubGraph::MallocTensorWithReuse() { | |||||
| int ret; | int ret; | ||||
| kernel::LiteKernelUtil::InitTensorRefCount(nodes_); | kernel::LiteKernelUtil::InitTensorRefCount(nodes_); | ||||
| for (auto *kernel : nodes_) { | for (auto *kernel : nodes_) { | ||||
| @@ -297,10 +297,9 @@ int SubGraphOpenCLKernel::MallocTensorWithReuse() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors, | |||||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, | |||||
| bool is_from) { | |||||
| void OpenCLSubGraph::GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors, | |||||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from) { | |||||
| std::vector<std::set<lite::Tensor *>> ksets; | std::vector<std::set<lite::Tensor *>> ksets; | ||||
| for (auto jv : in_kernels) { | for (auto jv : in_kernels) { | ||||
| MS_ASSERT(jv); | MS_ASSERT(jv); | ||||
| @@ -321,32 +320,24 @@ void SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor | |||||
| } | } | ||||
| } | } | ||||
| void SubGraphOpenCLKernel::GetInOutNodes() { | |||||
| std::vector<std::set<lite::Tensor *>> ksets_in; | |||||
| std::vector<std::set<lite::Tensor *>> ksets_out; | |||||
| for (auto jv : nodes_) { | |||||
| MS_ASSERT(jv); | |||||
| std::set<lite::Tensor *> kset; | |||||
| kset.insert(jv->in_tensors().begin(), jv->in_tensors().end()); | |||||
| ksets_in.emplace_back(kset); | |||||
| kset.clear(); | |||||
| kset.insert(jv->out_tensors().begin(), jv->out_tensors().end()); | |||||
| ksets_out.emplace_back(kset); | |||||
| } | |||||
| for (size_t j = 0; j < nodes_.size(); ++j) { | |||||
| if (std::find_if(in_tensors_.begin(), in_tensors_.end(), | |||||
| [&ksets_in, &j](lite::Tensor *val) { return ksets_in[j].count(val); }) != in_tensors_.end()) { | |||||
| in_nodes_.emplace_back(nodes_.at(j)); | |||||
| void OpenCLSubGraph::GetInOutNodes() { | |||||
| for (auto *node : nodes_) { | |||||
| for (auto *tensor : node->in_tensors()) { | |||||
| if (std::find(in_tensors_.begin(), in_tensors_.end(), tensor) != in_tensors_.end()) { | |||||
| in_nodes_.emplace_back(node); | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| if (std::find_if(out_tensors_.begin(), out_tensors_.end(), | |||||
| [&ksets_out, &j](lite::Tensor *val) { return ksets_out[j].count(val); }) != out_tensors_.end()) { | |||||
| out_nodes_.emplace_back(nodes_.at(j)); | |||||
| for (auto *tensor : node->out_tensors()) { | |||||
| if (std::find(out_tensors_.begin(), out_tensors_.end(), tensor) != out_tensors_.end()) { | |||||
| out_nodes_.emplace_back(node); | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| int SubGraphOpenCLKernel::Prepare() { | |||||
| int OpenCLSubGraph::Prepare() { | |||||
| executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor(); | executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor(); | ||||
| if (executor_ == nullptr) { | if (executor_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Create OpenCLExecutor fail"; | MS_LOG(ERROR) << "Create OpenCLExecutor fail"; | ||||
| @@ -361,7 +352,7 @@ int SubGraphOpenCLKernel::Prepare() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void SubGraphOpenCLKernel::UnInit() { | |||||
| void OpenCLSubGraph::UnInit() { | |||||
| for (const auto &tensor : in_convert_tensors_) { | for (const auto &tensor : in_convert_tensors_) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| @@ -379,11 +370,11 @@ void SubGraphOpenCLKernel::UnInit() { | |||||
| delete this->executor_; | delete this->executor_; | ||||
| } | } | ||||
| int SubGraphOpenCLKernel::InferShape() { return RET_OK; } | |||||
| int OpenCLSubGraph::InferShape() { return RET_OK; } | |||||
| int SubGraphOpenCLKernel::ReSize() { return RET_OK; } | |||||
| int OpenCLSubGraph::ReSize() { return RET_OK; } | |||||
| int SubGraphOpenCLKernel::Run() { | |||||
| int OpenCLSubGraph::Run() { | |||||
| if (executor_ == nullptr) { | if (executor_ == nullptr) { | ||||
| MS_LOG(ERROR) << "executor is nullptr"; | MS_LOG(ERROR) << "executor is nullptr"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_ | |||||
| #include <set> | #include <set> | ||||
| #include <vector> | #include <vector> | ||||
| @@ -25,25 +25,19 @@ | |||||
| #include "src/sub_graph_kernel.h" | #include "src/sub_graph_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| struct SubGraphOpenCLParameter { | |||||
| OpParameter op_parameter; | |||||
| int input_size; | |||||
| int output_size; | |||||
| }; | |||||
| class SubGraphOpenCLKernel : public SubGraphKernel { | |||||
| class OpenCLSubGraph : public SubGraphKernel { | |||||
| public: | public: | ||||
| SubGraphOpenCLKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||||
| const std::vector<kernel::LiteKernel *> &inKernels, | |||||
| const std::vector<kernel::LiteKernel *> &outKernels, | |||||
| const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx = nullptr) | |||||
| OpenCLSubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||||
| const std::vector<kernel::LiteKernel *> &inKernels, | |||||
| const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes, | |||||
| const lite::InnerContext *ctx = nullptr) | |||||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { | : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { | ||||
| ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | ||||
| subgraph_type_ = kGpuSubGraph; | subgraph_type_ = kGpuSubGraph; | ||||
| this->name_ = "GpuSubGraph"; | this->name_ = "GpuSubGraph"; | ||||
| nodes_set_.insert(nodes.begin(), nodes.end()); | nodes_set_.insert(nodes.begin(), nodes.end()); | ||||
| } | } | ||||
| ~SubGraphOpenCLKernel() override; | |||||
| ~OpenCLSubGraph() override; | |||||
| int PreProcess() override { return mindspore::lite::RET_OK; } | int PreProcess() override { return mindspore::lite::RET_OK; } | ||||
| int PostProcess() override { return mindspore::lite::RET_OK; } | int PostProcess() override { return mindspore::lite::RET_OK; } | ||||
| @@ -25,7 +25,7 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/sub_graph_kernel.h" | #include "src/sub_graph_kernel.h" | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "src/runtime/opencl/opencl_runtime.h" | #include "src/runtime/opencl/opencl_runtime.h" | ||||
| #endif | #endif | ||||
| @@ -241,7 +241,7 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel | |||||
| if (type == kernel::kGpuSubGraph) { | if (type == kernel::kGpuSubGraph) { | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| auto sub_kernel = new (std::nothrow) | auto sub_kernel = new (std::nothrow) | ||||
| kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | |||||
| kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | |||||
| return sub_kernel; | return sub_kernel; | ||||
| #else | #else | ||||
| return nullptr; | return nullptr; | ||||
| @@ -83,7 +83,7 @@ if (SUPPORT_GPU) | |||||
| set(KERNEL_OP_SRC | set(KERNEL_OP_SRC | ||||
| ${KERNEL_OP_SRC} | ${KERNEL_OP_SRC} | ||||
| ${GPU_KERNEL_OP_SRC} | ${GPU_KERNEL_OP_SRC} | ||||
| ${LITE_DIR}/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc | |||||
| ${LITE_DIR}/src/runtime/kernel/opencl/opencl_subgraph.cc | |||||
| ${LITE_DIR}/src/runtime/kernel/opencl/utils.cc | ${LITE_DIR}/src/runtime/kernel/opencl/utils.cc | ||||
| ) | ) | ||||
| endif() | endif() | ||||
| @@ -19,12 +19,12 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/common/file_utils.h" | #include "mindspore/lite/src/common/file_utils.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h" | ||||
| using mindspore::kernel::BiasAddOpenCLKernel; | using mindspore::kernel::BiasAddOpenCLKernel; | ||||
| using mindspore::kernel::LiteKernel; | using mindspore::kernel::LiteKernel; | ||||
| using mindspore::kernel::SubGraphOpenCLKernel; | |||||
| using mindspore::kernel::OpenCLSubGraph; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| @@ -157,7 +157,7 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| MS_LOG(INFO) << "initialize sub_graph"; | MS_LOG(INFO) << "initialize sub_graph"; | ||||
| std::vector<kernel::LiteKernel *> kernels{biasadd_kernel}; | std::vector<kernel::LiteKernel *> kernels{biasadd_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({input_tensor}, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({input_tensor}, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(ERROR) << "Create sub_graph kernel error."; | MS_LOG(ERROR) << "Create sub_graph kernel error."; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/common/file_utils.h" | #include "mindspore/lite/src/common/file_utils.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h" | ||||
| // PrimitiveType_Cast: src/ops/populate/cast_populate.cc | // PrimitiveType_Cast: src/ops/populate/cast_populate.cc | ||||
| @@ -93,9 +93,9 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | |||||
| } | } | ||||
| MS_LOG(INFO) << " initialize sub_graph "; | MS_LOG(INFO) << " initialize sub_graph "; | ||||
| std::vector<kernel::LiteKernel *> kernels{cast_kernel}; | std::vector<kernel::LiteKernel *> kernels{cast_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||||
| MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed "; | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| @@ -179,9 +179,9 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { | |||||
| } | } | ||||
| MS_LOG(INFO) << " initialize sub_graph "; | MS_LOG(INFO) << " initialize sub_graph "; | ||||
| std::vector<kernel::LiteKernel *> kernels{cast_kernel}; | std::vector<kernel::LiteKernel *> kernels{cast_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||||
| MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed "; | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| @@ -17,11 +17,11 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "ut/src/runtime/kernel/opencl/common.h" | #include "ut/src/runtime/kernel/opencl/common.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| using mindspore::kernel::LiteKernel; | using mindspore::kernel::LiteKernel; | ||||
| using mindspore::kernel::SubGraphOpenCLKernel; | |||||
| using mindspore::kernel::OpenCLSubGraph; | |||||
| using mindspore::lite::KernelRegistry; | using mindspore::lite::KernelRegistry; | ||||
| using mindspore::schema::Format::Format_NHWC; | using mindspore::schema::Format::Format_NHWC; | ||||
| @@ -99,12 +99,12 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std | |||||
| // simulating benchmark: session_->CompileGraph() -> scheduler.Schedule() -> ConstructSubGraphs() | // simulating benchmark: session_->CompileGraph() -> scheduler.Schedule() -> ConstructSubGraphs() | ||||
| MS_LOG(DEBUG) << "create SubGraph"; | MS_LOG(DEBUG) << "create SubGraph"; | ||||
| std::vector<LiteKernel *> kernels{kernel}; | std::vector<LiteKernel *> kernels{kernel}; | ||||
| auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels); | |||||
| auto sub_graph = new (std::nothrow) OpenCLSubGraph(subgraph_inputs, {&output}, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| return; | return; | ||||
| } | } | ||||
| // simulating benchmark: session_->CompileGraph() -> PrepareKernels() -> SubGraphOpenCLKernel.Prepare() | |||||
| // simulating benchmark: session_->CompileGraph() -> PrepareKernels() -> OpenCLSubGraph.Prepare() | |||||
| MS_LOG(DEBUG) << "call sub_graph->Prepare()"; | MS_LOG(DEBUG) << "call sub_graph->Prepare()"; | ||||
| EXPECT_TRUE(sub_graph->Prepare() == RET_OK); // will set Tensor's allocator be OpenCLAllocator | EXPECT_TRUE(sub_graph->Prepare() == RET_OK); // will set Tensor's allocator be OpenCLAllocator | ||||
| @@ -128,7 +128,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std | |||||
| memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); | memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); | ||||
| } | } | ||||
| // simulating benchmark: MarkAccuracy() -> session_->RunGraph() -> executor_->Run() -> SubGraphOpenCLKernel->Run() | |||||
| // simulating benchmark: MarkAccuracy() -> session_->RunGraph() -> executor_->Run() -> OpenCLSubGraph->Run() | |||||
| MS_LOG(DEBUG) << "run SubGraph & compare result"; | MS_LOG(DEBUG) << "run SubGraph & compare result"; | ||||
| EXPECT_TRUE(sub_graph->Run() == RET_OK); // will call UnmapBuffer() for input | EXPECT_TRUE(sub_graph->Run() == RET_OK); // will call UnmapBuffer() for input | ||||
| @@ -44,4 +44,288 @@ TEST_F(TestOpenCL_Concat, input2_axis0) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Concat, input2_axis1_Align) { | |||||
| std::vector<int> input0_shape = {2, 2, 2, 8}; | |||||
| std::vector<int> input1_shape = {2, 2, 2, 8}; | |||||
| std::vector<int> output_shape = {2, 4, 2, 8}; | |||||
| int axis = 1; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| float output_data[] = { | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, | |||||
| 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, | |||||
| 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, | |||||
| 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, | |||||
| 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, | |||||
| 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, | |||||
| 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, VAR}}, {output_shape, output_data}, param, | |||||
| fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input6_axis1_Align) { | |||||
| std::vector<int> input0_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> input1_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> input2_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> input3_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> input4_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> input5_shape = {2, 3, 2, 8}; | |||||
| std::vector<int> output_shape = {2, 18, 2, 8}; | |||||
| int axis = 1; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| float input2_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39}; | |||||
| float input3_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| float input4_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39}; | |||||
| float input5_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| float output_data[] = { | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, | |||||
| 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, | |||||
| 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, | |||||
| 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, | |||||
| 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, | |||||
| 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, | |||||
| 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, | |||||
| 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, | |||||
| 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, | |||||
| 0.13, 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, | |||||
| 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, | |||||
| 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, | |||||
| 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, | |||||
| 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, | |||||
| 0.69, 0.13, 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, | |||||
| 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, | |||||
| 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, | |||||
| 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, | |||||
| 0.46, 0.69, 0.13, 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, | |||||
| 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, | |||||
| 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, | |||||
| 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, | |||||
| 0.23, 0.46, 0.69, 0.13, 0.41, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, | |||||
| 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, | |||||
| 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, | |||||
| 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, | |||||
| 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.5, 0.6, | |||||
| 0.74, 0.23, 0.46, 0.69, 0.13, 0.41}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, | |||||
| {input1_shape, input1_data, VAR}, | |||||
| {input2_shape, input2_data, VAR}, | |||||
| {input3_shape, input3_data, VAR}, | |||||
| {input4_shape, input4_data, VAR}, | |||||
| {input5_shape, input5_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input6_axis2_Align) { | |||||
| std::vector<int> input0_shape = {1, 1, 8}; | |||||
| std::vector<int> input1_shape = {1, 1, 8}; | |||||
| std::vector<int> input2_shape = {1, 1, 8}; | |||||
| std::vector<int> input3_shape = {1, 1, 8}; | |||||
| std::vector<int> input4_shape = {1, 1, 8}; | |||||
| std::vector<int> input5_shape = {1, 1, 8}; | |||||
| std::vector<int> output_shape = {1, 1, 48}; | |||||
| int axis = 2; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.13, 0.16}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47, 0.16}; | |||||
| float input2_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.16}; | |||||
| float input3_data[] = {0.52, 0.63, 0.78, 0.43, 0.56, 0.69, 0.87, 0.16}; | |||||
| float input4_data[] = {0.5, 0.6, 0.74, 0.30, 0.9, 0.59, 0.13, 0.16}; | |||||
| float input5_data[] = {0.75, 0.06, 0.74, 0.23, 0.46, 0.69, 0.47, 0.16}; | |||||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.13, 0.16, 0.5, 0.6, 0.74, 0.23, | |||||
| 0.46, 0.69, 0.47, 0.16, 0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.16, | |||||
| 0.52, 0.63, 0.78, 0.43, 0.56, 0.69, 0.87, 0.16, 0.5, 0.6, 0.74, 0.30, | |||||
| 0.9, 0.59, 0.13, 0.16, 0.75, 0.06, 0.74, 0.23, 0.46, 0.69, 0.47, 0.16}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, | |||||
| {input1_shape, input1_data, VAR}, | |||||
| {input2_shape, input2_data, VAR}, | |||||
| {input3_shape, input3_data, VAR}, | |||||
| {input4_shape, input4_data, VAR}, | |||||
| {input5_shape, input5_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input2_axis3_UnAlign) { | |||||
| std::vector<int> input0_shape = {2, 2, 2, 8}; | |||||
| std::vector<int> input1_shape = {2, 2, 2, 9}; | |||||
| std::vector<int> output_shape = {2, 2, 2, 17}; | |||||
| int axis = 3; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, | |||||
| 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, | |||||
| 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, | |||||
| 0.13, 0.41, 0.52, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52}; | |||||
| float output_data[] = { | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.25, 0.39, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.41, 0.52, | |||||
| }; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, VAR}}, {output_shape, output_data}, param, | |||||
| fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input3_axis1_UnAlign) { | |||||
| std::vector<int> input0_shape = {1, 6}; | |||||
| std::vector<int> input1_shape = {1, 7}; | |||||
| std::vector<int> input2_shape = {1, 8}; | |||||
| std::vector<int> output_shape = {1, 21}; | |||||
| int axis = 1; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47}; | |||||
| float input2_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13}; | |||||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.5, 0.6, 0.74, 0.23, 0.46, | |||||
| 0.69, 0.47, 0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, VAR}, {input2_shape, input2_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input4_axis3_UnAlign) { | |||||
| std::vector<int> input0_shape = {1, 1, 1, 6}; | |||||
| std::vector<int> input1_shape = {1, 1, 1, 7}; | |||||
| std::vector<int> input2_shape = {1, 1, 1, 8}; | |||||
| std::vector<int> input3_shape = {1, 1, 1, 9}; | |||||
| std::vector<int> output_shape = {1, 1, 1, 30}; | |||||
| int axis = -1; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47}; | |||||
| float input2_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13}; | |||||
| float input3_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.26}; | |||||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47, 0.03, 0.37, | |||||
| 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.26}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, | |||||
| {input1_shape, input1_data, VAR}, | |||||
| {input2_shape, input2_data, VAR}, | |||||
| {input3_shape, input3_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input5_axis3_UnAlign) { | |||||
| std::vector<int> input0_shape = {1, 1, 1, 6}; | |||||
| std::vector<int> input1_shape = {1, 1, 1, 7}; | |||||
| std::vector<int> input2_shape = {1, 1, 1, 8}; | |||||
| std::vector<int> input3_shape = {1, 1, 1, 9}; | |||||
| std::vector<int> input4_shape = {1, 1, 1, 10}; | |||||
| std::vector<int> output_shape = {1, 1, 1, 40}; | |||||
| int axis = 3; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47}; | |||||
| float input2_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13}; | |||||
| float input3_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.26}; | |||||
| float input4_data[] = {0.06, 0.47, 0.74, 0.23, 0.56, 0.69, 0.73, 0.13, 0.96, 0.78}; | |||||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47, 0.03, | |||||
| 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, | |||||
| 0.13, 0.26, 0.06, 0.47, 0.74, 0.23, 0.56, 0.69, 0.73, 0.13, 0.96, 0.78}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, | |||||
| {input1_shape, input1_data, VAR}, | |||||
| {input2_shape, input2_data, VAR}, | |||||
| {input3_shape, input3_data, VAR}, | |||||
| {input4_shape, input4_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Concat, input6_axis3_UnAlign) { | |||||
| std::vector<int> input0_shape = {1, 1, 1, 6}; | |||||
| std::vector<int> input1_shape = {1, 1, 1, 7}; | |||||
| std::vector<int> input2_shape = {1, 1, 1, 8}; | |||||
| std::vector<int> input3_shape = {1, 1, 1, 9}; | |||||
| std::vector<int> input4_shape = {1, 1, 1, 10}; | |||||
| std::vector<int> input5_shape = {1, 1, 1, 11}; | |||||
| std::vector<int> output_shape = {1, 1, 1, 51}; | |||||
| int axis = 3; | |||||
| float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59}; | |||||
| float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47}; | |||||
| float input2_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13}; | |||||
| float input3_data[] = {0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.26}; | |||||
| float input4_data[] = {0.06, 0.47, 0.74, 0.23, 0.56, 0.69, 0.73, 0.13, 0.96, 0.78}; | |||||
| float input5_data[] = {0.16, 0.77, 0.84, 0.53, 0.36, 0.29, 0.53, 0.23, 0.86, 0.48, 0.36}; | |||||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.47, | |||||
| 0.03, 0.37, 0.74, 0.23, 0.46, 0.69, 0.13, 0.13, 0.03, 0.37, 0.74, 0.23, 0.46, | |||||
| 0.69, 0.13, 0.13, 0.26, 0.06, 0.47, 0.74, 0.23, 0.56, 0.69, 0.73, 0.13, 0.96, | |||||
| 0.78, 0.16, 0.77, 0.84, 0.53, 0.36, 0.29, 0.53, 0.23, 0.86, 0.48, 0.36}; | |||||
| for (auto fp16_enable : {false, true}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input0_shape, input0_data, VAR}, | |||||
| {input1_shape, input1_data, VAR}, | |||||
| {input2_shape, input2_data, VAR}, | |||||
| {input3_shape, input3_data, VAR}, | |||||
| {input4_shape, input4_data, VAR}, | |||||
| {input5_shape, input5_data, VAR}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| } // namespace mindspore::lite::opencl::test | } // namespace mindspore::lite::opencl::test | ||||
| @@ -21,7 +21,7 @@ namespace mindspore::lite::opencl::test { | |||||
| class TestOpenCL_Conv2D : public CommonTest {}; | class TestOpenCL_Conv2D : public CommonTest {}; | ||||
| namespace { | namespace { | ||||
| // PrimitiveType_Concat: src/ops/populate/conv2d_populate.cc | |||||
| // PrimitiveType_Conv2D: src/ops/populate/conv2d_populate.cc | |||||
| ConvParameter *CreateParameter(const std::string &attr, ActType act_type) { | ConvParameter *CreateParameter(const std::string &attr, ActType act_type) { | ||||
| auto *param = test::CreateParameter<ConvParameter>(schema::PrimitiveType_Conv2D); | auto *param = test::CreateParameter<ConvParameter>(schema::PrimitiveType_Conv2D); | ||||
| param->act_type_ = act_type; | param->act_type_ = act_type; | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include "src/common/log_adapter.h" | #include "src/common/log_adapter.h" | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h" | ||||
| using mindspore::lite::Tensor; | using mindspore::lite::Tensor; | ||||
| using mindspore::schema::PrimitiveType_Fill; | using mindspore::schema::PrimitiveType_Fill; | ||||
| @@ -70,9 +70,9 @@ TEST_F(TestFillOpenCLCI, Fp32testfill) { | |||||
| fill_kernel->Init(); | fill_kernel->Init(); | ||||
| MS_LOG(INFO) << " initialize sub_graph "; | MS_LOG(INFO) << " initialize sub_graph "; | ||||
| std::vector<kernel::LiteKernel *> kernels{fill_kernel}; | std::vector<kernel::LiteKernel *> kernels{fill_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({&in_tensor1}, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||||
| MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed "; | |||||
| delete param; | delete param; | ||||
| delete fill_kernel; | delete fill_kernel; | ||||
| return; | return; | ||||
| @@ -126,9 +126,9 @@ TEST_F(TestFillOpenCLCI, Fp32testshape) { | |||||
| fill_kernel->Init(); | fill_kernel->Init(); | ||||
| MS_LOG(INFO) << " initialize sub_graph "; | MS_LOG(INFO) << " initialize sub_graph "; | ||||
| std::vector<kernel::LiteKernel *> kernels{fill_kernel}; | std::vector<kernel::LiteKernel *> kernels{fill_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({&in_tensor1}, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||||
| MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed "; | |||||
| delete param; | delete param; | ||||
| delete fill_kernel; | delete fill_kernel; | ||||
| return; | return; | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include "src/common/log_adapter.h" | #include "src/common/log_adapter.h" | ||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/power.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/power.h" | ||||
| // PrimitiveType_Power: src/ops/populate/power_populate.cc | // PrimitiveType_Power: src/ops/populate/power_populate.cc | ||||
| @@ -88,9 +88,9 @@ void TEST_MAIN(const T *input_data1, const T *input_data2, const T *expect_data, | |||||
| MS_LOG(INFO) << " initialize sub_graph "; | MS_LOG(INFO) << " initialize sub_graph "; | ||||
| std::vector<kernel::LiteKernel *> kernels{power_kernel}; | std::vector<kernel::LiteKernel *> kernels{power_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph(inputs, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||||
| MS_LOG(INFO) << " new kernel::OpenCLSubGraph failed "; | |||||
| delete param; | delete param; | ||||
| delete power_kernel; | delete power_kernel; | ||||
| return; | return; | ||||
| @@ -20,13 +20,13 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/common/file_utils.h" | #include "mindspore/lite/src/common/file_utils.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h" | ||||
| #include "mindspore/lite/nnacl/prelu_parameter.h" | #include "mindspore/lite/nnacl/prelu_parameter.h" | ||||
| using mindspore::kernel::LiteKernel; | using mindspore::kernel::LiteKernel; | ||||
| using mindspore::kernel::OpenCLSubGraph; | |||||
| using mindspore::kernel::PReluOpenCLKernel; | using mindspore::kernel::PReluOpenCLKernel; | ||||
| using mindspore::kernel::SubGraphOpenCLKernel; | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| @@ -150,7 +150,7 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| MS_LOG(INFO) << "initialize sub_graph"; | MS_LOG(INFO) << "initialize sub_graph"; | ||||
| std::vector<kernel::LiteKernel *> kernels{prelu_kernel}; | std::vector<kernel::LiteKernel *> kernels{prelu_kernel}; | ||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({input_tensor}, outputs, kernels, kernels, kernels); | |||||
| auto *sub_graph = new (std::nothrow) kernel::OpenCLSubGraph({input_tensor}, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
| MS_LOG(ERROR) << "Create kernel sub_graph error"; | MS_LOG(ERROR) << "Create kernel sub_graph error"; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| @@ -19,7 +19,7 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/common/file_utils.h" | #include "mindspore/lite/src/common/file_utils.h" | ||||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | ||||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" | |||||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h" | #include "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h" | ||||
| namespace mindspore::lite::opencl::test { | namespace mindspore::lite::opencl::test { | ||||
| @@ -69,7 +69,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | |||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| std::vector<kernel::LiteKernel *> kernels{arith_kernel}; | std::vector<kernel::LiteKernel *> kernels{arith_kernel}; | ||||
| auto pGraph_ptr = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs, outputs, kernels, kernels, kernels); | |||||
| auto pGraph_ptr = std::make_unique<kernel::OpenCLSubGraph>(inputs, outputs, kernels, kernels, kernels); | |||||
| auto pGraph = pGraph_ptr.get(); | auto pGraph = pGraph_ptr.get(); | ||||
| if (pGraph == nullptr) { | if (pGraph == nullptr) { | ||||
| MS_LOG(ERROR) << "pGraph create error."; | MS_LOG(ERROR) << "pGraph create error."; | ||||