| @@ -36,11 +36,19 @@ file(GLOB_RECURSE C_OPS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc) | |||||
| add_library(mindspore-lite SHARED ${LITE_SRC} ${C_OPS_SRC}) | add_library(mindspore-lite SHARED ${LITE_SRC} ${C_OPS_SRC}) | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") | ||||
| add_library(anf OBJECT ${ANF_SRC}) | add_library(anf OBJECT ${ANF_SRC}) | ||||
| target_link_libraries(mindspore-lite | |||||
| anf | |||||
| cpu_kernel_mid_ | |||||
| ) | |||||
| if (SUPPORT_GPU) | |||||
| add_subdirectory(runtime/kernel/opencl) | |||||
| target_link_libraries(mindspore-lite | |||||
| anf | |||||
| cpu_kernel_mid_ | |||||
| opencl_kernel_lib_ | |||||
| ) | |||||
| else () | |||||
| target_link_libraries(mindspore-lite | |||||
| anf | |||||
| cpu_kernel_mid_ | |||||
| ) | |||||
| endif () | |||||
| add_subdirectory(runtime/kernel/arm) | add_subdirectory(runtime/kernel/arm) | ||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | if (PLATFORM_ARM32 OR PLATFORM_ARM64) | ||||
| target_link_libraries(mindspore-lite log) | target_link_libraries(mindspore-lite log) | ||||
| @@ -1,16 +1,2 @@ | |||||
| set(OPENCL_KERNEL_SRC | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_opencl_kernel.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/utils.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/arithmetic.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolution.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/depthwise_conv2d.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/pooling2d.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/matmul.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/softmax.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/concat.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/conv2d_transpose.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/transpose.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/reshape.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/activation.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/kernel/caffe_prelu.cc | |||||
| ) | |||||
| file(GLOB_RECURSE OPENCL_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*.cc) | |||||
| add_library(opencl_kernel_lib_ OBJECT ${KERNEL_SRC} ${OPENCL_KERNEL_SRC}) | |||||
| @@ -131,7 +131,6 @@ int ArithmeticOpenCLKernel::Run() { | |||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| uint32_t element_num = out_tensors_[0]->ElementsC4Num(); | |||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); | ||||
| @@ -40,7 +40,6 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel { | |||||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | ||||
| private: | private: | ||||
| ConvParameter *parameter_; | |||||
| cl::Kernel kernel_; | cl::Kernel kernel_; | ||||
| void *padWeight_; | void *padWeight_; | ||||
| void *bias_; | void *bias_; | ||||
| @@ -126,6 +126,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size | |||||
| im_dst_y = h; | im_dst_y = h; | ||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupported format. " << out_tensors_[0]->GetFormat(); | MS_LOG(ERROR) << "Unsupported format. " << out_tensors_[0]->GetFormat(); | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| img_size->clear(); | img_size->clear(); | ||||
| auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable(); | auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable(); | ||||
| @@ -129,7 +129,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||||
| printf("%.3f ", output_data[i]); | printf("%.3f ", output_data[i]); | ||||
| } | } | ||||
| printf("\n"); | printf("\n"); | ||||
| size_t output_size = tensor_out->Size(); | |||||
| float expect[4] = {2.0f, 3.0f, 4.0f, 5.0f}; | float expect[4] = {2.0f, 3.0f, 4.0f, 5.0f}; | ||||
| for (int i = 0; i < tensor_out->ElementsNum(); ++i) | for (int i = 0; i < tensor_out->ElementsNum(); ++i) | ||||
| @@ -54,7 +54,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat | |||||
| } | } | ||||
| // pack weight | // pack weight | ||||
| int OC4 = UP_DIV(conv_param->output_channel_, C4NUM); | |||||
| int pack_weight_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_; | int pack_weight_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_; | ||||
| T1 *packed_weight = weight_data; | T1 *packed_weight = weight_data; | ||||
| @@ -103,7 +102,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat | |||||
| std::vector<kernel::LiteKernel *> kernels{pKernel.get()}; | std::vector<kernel::LiteKernel *> kernels{pKernel.get()}; | ||||
| std::vector<lite::tensor::Tensor *> inputs_{&tensor_a}; | std::vector<lite::tensor::Tensor *> inputs_{&tensor_a}; | ||||
| size_t C4 = UP_DIV(inputs[0]->Channel(), C4NUM); | |||||
| auto pGraph = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs_, outputs, kernels, kernels, kernels); | auto pGraph = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs_, outputs, kernels, kernels, kernels); | ||||
| if (pGraph.get() == nullptr) { | if (pGraph.get() == nullptr) { | ||||
| delete[] packed_input; | delete[] packed_input; | ||||