| @@ -1,9 +1,11 @@ | |||
| set(TEST_DIR ${TOP_DIR}/tests/ut/cpp) | |||
| set(TEST_DIR ${TOP_DIR}/mindspore/lite/test) | |||
| set(LITE_DIR ${TOP_DIR}/mindspore/lite) | |||
| include_directories(${TOP_DIR}) | |||
| include_directories(${TEST_DIR}) | |||
| include_directories(${LITE_DIR}) | |||
| include_directories(${LITE_DIR}/tools) | |||
| include_directories(${LITE_DIR}/lite) | |||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake) | |||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/external_libs/gtest.cmake) | |||
| ### anf src | |||
| set(ANF_SRC | |||
| @@ -158,7 +160,7 @@ set(TEST_LITE_SRC | |||
| ${LITE_DIR}/tools/common/flag_parser.cc | |||
| ${LITE_DIR}/tools/common/storage.cc | |||
| ${LITE_DIR}/tools/benchmark/benchmark.cc | |||
| ${LITE_DIR}/test/benchmark_test.cc | |||
| ${LITE_DIR}/test/st/benchmark_test.cc | |||
| ) | |||
| ### gpu runtime | |||
| if (SUPPORT_GPU) | |||
| @@ -179,6 +181,7 @@ endif() | |||
| if(BUILD_CONVERTER) | |||
| set(TEST_LITE_SRC | |||
| ${TEST_LITE_SRC} | |||
| ${TOP_DIR}/mindspore/core/utils/flags.cc | |||
| ${LITE_DIR}/tools/converter/optimizer.cc | |||
| ${LITE_DIR}/src/common/anf_importer/anf_importer.cc | |||
| ${LITE_DIR}/src/common/anf_importer/import_from_meta_graphT.cc | |||
| @@ -188,7 +191,7 @@ if(BUILD_CONVERTER) | |||
| ${LITE_DIR}/tools/converter/converter_flags.cc | |||
| ${LITE_DIR}/tools/converter/converter.cc | |||
| ${LITE_DIR}/tools/converter/parser/onnx/onnx.pb.cc | |||
| ${LITE_DIR}/test/converter_test.cc | |||
| ${LITE_DIR}/test/st/converter_test.cc | |||
| ${LITE_DIR}/src/gllo/common/node_pass.cc | |||
| ${LITE_DIR}/src/gllo/common/optimizer.cc | |||
| ${LITE_DIR}/src/gllo/common/pass_manager.cc | |||
| @@ -233,59 +236,50 @@ else() | |||
| endif() | |||
| ### test src | |||
| file(GLOB_RECURSE TEST_CASE_KERNEL_SRC | |||
| ${TEST_DIR}/kernel/cpu/arm/fp32/*.cc | |||
| ${TEST_DIR}/kernel/cpu/arm/int8/*.cc | |||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc | |||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc | |||
| ) | |||
| set(TEST_SRC | |||
| ${TEST_LITE_SRC} | |||
| ${TEST_CASE_KERNEL_SRC} | |||
| ${TEST_DIR}/common/common_test.cc | |||
| ${TEST_DIR}/common/test_lite_main.cc | |||
| ${TEST_DIR}/kernel/cpu/arm/common/pack_tests.cc | |||
| ${TEST_DIR}/device/cpu/arm/infer_test.cc | |||
| ${TEST_DIR}/main.cc | |||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/common/pack_tests.cc | |||
| ${TEST_DIR}/ut/src/infer_test.cc | |||
| # ${TEST_DIR}/device/cpu/arm/graph_test.cc | |||
| ) | |||
| if (SUPPORT_TRAIN) | |||
| set(TEST_SRC | |||
| ${TEST_SRC} | |||
| ${TEST_DIR}/device/cpu/arm/train_test.cc | |||
| ${TEST_DIR}/ut/src/train_test.cc | |||
| ) | |||
| else() | |||
| set(TEST_SRC | |||
| ${TEST_SRC} | |||
| ${TEST_DIR}/device/cpu/arm/infer_test.cc | |||
| ${TEST_DIR}/ut/src/infer_test.cc | |||
| ) | |||
| endif() | |||
| if (SUPPORT_GPU) | |||
| set(TEST_SRC | |||
| ${TEST_SRC} | |||
| ${TEST_DIR}/device/opencl/opencl_infer_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/utils_cl_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/arithmetic_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/convolution_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/depthwise_conv2d_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/matmul_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/max_pooling_cl_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/avg_pooling_cl_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/softmax_cl_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/concat_tests.cc | |||
| ${TEST_DIR}/kernel/opencl/conv2d_transpose_tests.cc | |||
| ${TEST_DIR}/ut/stc/runtime/kernel/opencl/matmul_tests.cc | |||
| ${TEST_DIR}/ut/stc/runtime/kernel/opencl/softmax_cl_tests.cc | |||
| ) | |||
| endif() | |||
| if (ENABLE_FP16) | |||
| set(TEST_SRC | |||
| ${TEST_SRC} | |||
| ${TEST_DIR}/kernel/cpu/arm/fp16/convolution_fp16_tests.cc) | |||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc) | |||
| endif () | |||
| add_executable(lite-test ${TEST_SRC}) | |||
| target_link_libraries(lite-test dl ${SECUREC_LIBRARY} ${GTEST_LIBRARY} mindspore::json) | |||
| target_link_libraries(lite-test dl ${SECUREC_LIBRARY} ${GTEST_LIBRARY} mindspore::json mindspore::gtest) | |||
| if (BUILD_CONVERTER) | |||
| target_link_libraries(lite-test | |||
| anf_exporter_mid | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/common_test.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| #endif | |||
| namespace mindspore { | |||
| void Common::SetUpTestCase() {} | |||
| void Common::TearDownTestCase() {} | |||
| void Common::SetUp() {} | |||
| void Common::TearDown() {} | |||
| } // namespace mindspore | |||
| #ifdef __cplusplus | |||
| #if __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,78 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef TESTS_UT_COMMON_UT_COMMON_H_ | |||
| #define TESTS_UT_COMMON_UT_COMMON_H_ | |||
| #include <cmath> | |||
| #include <fstream> | |||
| #include <iostream> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include "gtest/gtest.h" | |||
| namespace mindspore { | |||
| class Common : public testing::Test { | |||
| public: | |||
| // TestCase only enter once | |||
| static void SetUpTestCase(); | |||
| static void TearDownTestCase(); | |||
| // every TEST_F macro will enter one | |||
| virtual void SetUp(); | |||
| virtual void TearDown(); | |||
| template <typename T> | |||
| void PrintData(std::string name, T *output_data, int size) { | |||
| std::cout << "The " << name << " is as follows:" << std::endl; | |||
| if (typeid(output_data[0]) == typeid(uint8_t) || typeid(output_data[0]) == typeid(int8_t)) { | |||
| for (size_t i = 0; i < std::min(size, 100); i++) { | |||
| std::cout << static_cast<int>(output_data[i]) << " "; | |||
| } | |||
| } else { | |||
| for (size_t i = 0; i < std::min(size, 100); i++) { | |||
| std::cout << output_data[i] << " "; | |||
| } | |||
| } | |||
| std::cout << std::endl; | |||
| } | |||
| template <typename T> | |||
| static void CompareOutputData(T *output_data, T *correct_data, int size, float err_bound) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| T abs = fabs(output_data[i] - correct_data[i]); | |||
| ASSERT_LE(abs, err_bound); | |||
| } | |||
| } | |||
| void ReadFile(const char *file, size_t *size, char **buf) { | |||
| ASSERT_NE(nullptr, file); | |||
| ASSERT_NE(nullptr, size); | |||
| ASSERT_NE(nullptr, buf); | |||
| std::string path = std::string(file); | |||
| std::ifstream ifs(path); | |||
| ASSERT_EQ(true, ifs.good()); | |||
| ASSERT_EQ(true, ifs.is_open()); | |||
| ifs.seekg(0, std::ios::end); | |||
| *size = ifs.tellg(); | |||
| *buf = new char[*size]; | |||
| ifs.seekg(0, std::ios::beg); | |||
| ifs.read(*buf, *size); | |||
| ifs.close(); | |||
| } | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // TESTS_UT_COMMON_UT_COMMON_H_ | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include "gtest/gtest.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| namespace mindspore { | |||
| extern void InitSubModulesLogLevel(); | |||
| } | |||
| GTEST_API_ int main(int argc, char** argv) { | |||
| mindspore::InitSubModulesLogLevel(); | |||
| testing::InitGoogleTest(&argc, argv); | |||
| int ret = RUN_ALL_TESTS(); | |||
| return ret; | |||
| } | |||
| @@ -15,12 +15,12 @@ | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include <string> | |||
| #include "tests/ut/cpp/common/common_test.h" | |||
| #include "common/common_test.h" | |||
| #include "benchmark/benchmark.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class BenchmarkTest : public UT::Common { | |||
| class BenchmarkTest : public mindspore::Common { | |||
| public: | |||
| BenchmarkTest() {} | |||
| }; | |||
| @@ -16,11 +16,11 @@ | |||
| #include <gtest/gtest.h> | |||
| #include <string> | |||
| #include "converter/converter.h" | |||
| #include "tests/ut/cpp/common/common_test.h" | |||
| #include "common/common_test.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| class ConverterTest : public UT::Common { | |||
| class ConverterTest : public mindspore::Common { | |||
| public: | |||
| ConverterTest() {} | |||
| }; | |||
| @@ -0,0 +1,246 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <time.h> | |||
| #include <climits> | |||
| #include <string> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <fstream> | |||
| #include "common/common_test.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "mindspore/lite/include/lite_session.h" | |||
| #include "mindspore/lite/src/executor.h" | |||
| #include "mindspore/lite/schema/inner/anf_ir_generated.h" | |||
| namespace mindspore { | |||
| class TestLiteInference : public mindspore::Common { | |||
| public: | |||
| TestLiteInference() {} | |||
| }; | |||
| std::string RealPath(const char *path) { | |||
| if (path == nullptr) { | |||
| return ""; | |||
| } | |||
| if ((strlen(path)) >= PATH_MAX) { | |||
| return ""; | |||
| } | |||
| std::shared_ptr<char> resolvedPath(new (std::nothrow) char[PATH_MAX]{0}); | |||
| if (resolvedPath == nullptr) { | |||
| return ""; | |||
| } | |||
| auto ret = realpath(path, resolvedPath.get()); | |||
| if (ret == nullptr) { | |||
| return ""; | |||
| } | |||
| return resolvedPath.get(); | |||
| } | |||
| char *ReadModelFile(const char *file, size_t *size) { | |||
| if (file == nullptr) { | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(size != nullptr); | |||
| std::ifstream ifs(RealPath(file)); | |||
| if (!ifs.good()) { | |||
| return nullptr; | |||
| } | |||
| if (!ifs.is_open()) { | |||
| return nullptr; | |||
| } | |||
| ifs.seekg(0, std::ios::end); | |||
| *size = ifs.tellg(); | |||
| std::unique_ptr<char> buf(new (std::nothrow) char[*size]); | |||
| if (buf == nullptr) { | |||
| ifs.close(); | |||
| return nullptr; | |||
| } | |||
| ifs.seekg(0, std::ios::beg); | |||
| ifs.read(buf.get(), *size); | |||
| ifs.close(); | |||
| return buf.release(); | |||
| } | |||
| // TEST_F(TestLiteInference, Net) { | |||
| // auto msGraph = std::make_shared<lite::GraphDefT>(); | |||
| // msGraph->name = "graph"; | |||
| // auto msSubgraph = std::make_unique<lite::SubGraphDefT>(); | |||
| // msSubgraph->name = "subGraph"; | |||
| // | |||
| // auto node = std::make_unique<lite::OpDefT>(); | |||
| // node->inputIndex = {0, 1}; | |||
| // node->outputIndex = {2}; | |||
| // node->attr.type = lite::OpT_Add; | |||
| // node->attr.value = new lite::AddT; | |||
| // node->name = "Add"; | |||
| // node->fmkType = lite::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(node)); | |||
| // | |||
| // msSubgraph->inputIndex = {0}; | |||
| // msSubgraph->outputIndex = {2}; | |||
| // | |||
| // auto input0 = std::make_unique<lite::TensorDefT>(); | |||
| // input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT; | |||
| // input0->format = lite::Format_NCHW; | |||
| // input0->dataType = TypeId::kNumberTypeFloat; | |||
| // input0->dims = {1, 1, 2, 2}; | |||
| // input0->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(input0)); | |||
| // | |||
| // auto input1 = std::make_unique<lite::TensorDefT>(); | |||
| // input1->refCount = lite::MSCONST_WEIGHT_REFCOUNT; | |||
| // input1->format = lite::Format_NCHW; | |||
| // input1->dataType = TypeId::kNumberTypeFloat; | |||
| // input1->dims = {1, 1, 2, 2}; | |||
| // input1->offset = -1; | |||
| // input1->data.resize(16); | |||
| // msSubgraph->allTensors.emplace_back(std::move(input1)); | |||
| // | |||
| // auto output = std::make_unique<lite::TensorDefT>(); | |||
| // output->refCount = 0; | |||
| // output->format = lite::Format_NCHW; | |||
| // output->dims = {1, 1, 2, 2}; | |||
| // output->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(output)); | |||
| // msGraph->subgraphs.emplace_back(std::move(msSubgraph)); | |||
| // | |||
| // flatbuffers::FlatBufferBuilder builder(1024); | |||
| // auto offset = lite::GraphDef::Pack(builder, msGraph.get()); | |||
| // builder.Finish(offset); | |||
| // int size = builder.GetSize(); | |||
| // auto *content = builder.GetBufferPointer(); | |||
| // mindspore::lite::Context context; | |||
| // context.allocator = nullptr; | |||
| // context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU; | |||
| // #if 0 | |||
| // auto graph = mindspore::lite::inference::LoadModel((char *)content, size); | |||
| // | |||
| // auto session = mindspore::lite::inference::Session::CreateSession(&context); | |||
| // | |||
| // std::vector<float> z1 = {1.1, 2.1, 3.1, 4.1}; | |||
| // std::vector<inference::MSTensor *> inputs; | |||
| // auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2})); | |||
| // memcpy_s(t1->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float)); | |||
| // | |||
| // auto t2 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 1, 2, 2})); | |||
| // memcpy_s(t2->MutableData(), z1.size() * sizeof(float), z1.data(), z1.size() * sizeof(float)); | |||
| // | |||
| // inputs.push_back(t1); | |||
| // inputs.push_back(t1); | |||
| // // VectorRef *outputs = new VectorRef(); | |||
| // auto outputs = session->RunGraph(inputs); | |||
| // #else | |||
| // auto file = "./efficientnet_b0.ms"; | |||
| // size_t model_size; | |||
| // | |||
| // char *modelbuf = ReadModelFile(file, &model_size); | |||
| // auto graph = mindspore::lite::inference::LoadModel(modelbuf, model_size); | |||
| // auto session = mindspore::lite::inference::Session::CreateSession(&context); | |||
| // session->CompileGraph(graph); | |||
| // std::vector<inference::MSTensor *> inputs; | |||
| // auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 244, 244, 3})); | |||
| // | |||
| // inputs.push_back(t1); | |||
| // auto outputs = session->RunGraph(inputs); | |||
| // #endif | |||
| // } | |||
| // TEST_F(TestLiteInference, Conv) { | |||
| // auto msGraph = std::make_shared<lite::GraphDefT>(); | |||
| // msGraph->name = "graph"; | |||
| // auto msSubgraph = std::make_unique<lite::SubGraphDefT>(); | |||
| // msSubgraph->name = "subGraph"; | |||
| // | |||
| // auto node = std::make_unique<lite::OpDefT>(); | |||
| // node->inputIndex = {0, 1}; | |||
| // node->outputIndex = {2}; | |||
| // node->attr.type = lite::OpT_Conv2D; | |||
| // auto attr = new lite::Conv2DT; | |||
| // attr->padMode = lite::PadMode_SAME; | |||
| // attr->channelIn = 1; | |||
| // attr->channelOut = 1; | |||
| // attr->format = lite::Format_NHWC; | |||
| // attr->strideH = 1; | |||
| // attr->strideW = 1; | |||
| // attr->kernelH = 2; | |||
| // attr->kernelW = 2; | |||
| // | |||
| // node->attr.value = attr; | |||
| // node->name = "Conv2D"; | |||
| // node->fmkType = lite::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(node)); | |||
| // | |||
| // msSubgraph->inputIndex = {0}; | |||
| // msSubgraph->outputIndex = {2}; | |||
| // // MS_LOG(ERROR) << "OutData"; | |||
| // | |||
| // auto input0 = std::make_unique<lite::TensorDefT>(); | |||
| // input0->refCount = lite::MSCONST_WEIGHT_REFCOUNT; | |||
| // input0->format = lite::Format_NCHW; | |||
| // input0->dataType = TypeId::kNumberTypeFloat; | |||
| // input0->dims = {1, 1, 5, 5}; | |||
| // // input0->data.resize(sizeof(float) * 25); | |||
| // // std::vector<float> input_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; | |||
| // // memcpy(input0->data.data(), input_data.data(), sizeof(int) * 25); | |||
| // input0->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(input0)); | |||
| // | |||
| // auto weight = std::make_unique<lite::TensorDefT>(); | |||
| // weight->refCount = lite::MSCONST_WEIGHT_REFCOUNT; | |||
| // weight->format = lite::Format_KHWC; | |||
| // weight->dataType = TypeId::kNumberTypeFloat; | |||
| // weight->dims = {1, 2, 2, 1}; | |||
| // weight->data.resize(sizeof(float) * 4); | |||
| // std::vector<float> weight_data = {1, 2, 3, 4}; | |||
| // memcpy(weight->data.data(), weight_data.data(), sizeof(int) * 4); | |||
| // weight->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(weight)); | |||
| // | |||
| // auto output = std::make_unique<lite::TensorDefT>(); | |||
| // output->refCount = 0; | |||
| // output->format = lite::Format_NCHW; | |||
| // output->dims = {1, 1, 5, 5}; | |||
| // output->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(output)); | |||
| // msGraph->subgraphs.emplace_back(std::move(msSubgraph)); | |||
| // | |||
| // flatbuffers::FlatBufferBuilder builder(1024); | |||
| // auto offset = lite::GraphDef::Pack(builder, msGraph.get()); | |||
| // builder.Finish(offset); | |||
| // int size = builder.GetSize(); | |||
| // auto *content = builder.GetBufferPointer(); | |||
| // mindspore::lite::Context context; | |||
| // context.allocator = nullptr; | |||
| // context.deviceCtx.type = mindspore::lite::DeviceType::DT_CPU; | |||
| // auto graph = mindspore::lite::inference::LoadModel((char *)content, size); | |||
| // auto session = mindspore::lite::inference::Session::CreateSession(&context); | |||
| // session->CompileGraph(graph); | |||
| // std::vector<inference::MSTensor *> inputs; | |||
| // auto t1 = inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, std::vector<int>({1, 3, 244, 244})); | |||
| // | |||
| // inputs.push_back(t1); | |||
| // auto outputs = session->RunGraph(inputs); | |||
| // } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,409 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cmath> | |||
| #include <memory> | |||
| #include "mindspore/lite/schema/inner/model_generated.h" | |||
| #include "mindspore/lite/include/model.h" | |||
| #include "common/common_test.h" | |||
| #include "include/lite_session.h" | |||
| #include "include/context.h" | |||
| #include "include/errorcode.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| namespace mindspore { | |||
| class InferTest : public mindspore::Common { | |||
| public: | |||
| InferTest() {} | |||
| }; | |||
| TEST_F(InferTest, TestConvNode) { | |||
| auto meta_graph = std::make_shared<schema::MetaGraphT>(); | |||
| meta_graph->name = "graph"; | |||
| auto node = std::make_unique<schema::CNodeT>(); | |||
| node->inputIndex = {0, 1}; | |||
| node->outputIndex = {2}; | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_Conv2D; | |||
| auto primitive = new schema::Conv2DT; | |||
| primitive->padMode = schema::PadMode_SAME; | |||
| primitive->channelIn = 3; | |||
| primitive->channelOut = 32; | |||
| primitive->format = schema::Format_NHWC; | |||
| primitive->strideH = 1; | |||
| primitive->strideW = 1; | |||
| primitive->kernelH = 3; | |||
| primitive->kernelW = 3; | |||
| primitive->dilateH = 1; | |||
| primitive->dilateW = 1; | |||
| node->primitive->value.value = primitive; | |||
| node->name = "Conv2D"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| meta_graph->inputIndex = {0}; | |||
| meta_graph->outputIndex = {2}; | |||
| auto input0 = std::make_unique<schema::TensorT>(); | |||
| input0->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| input0->format = schema::Format_NHWC; | |||
| input0->dataType = TypeId::kNumberTypeFloat32; | |||
| input0->dims = {1, 28, 28, 3}; | |||
| input0->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(input0)); | |||
| auto weight = std::make_unique<schema::TensorT>(); | |||
| weight->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| weight->format = schema::Format_KHWC; | |||
| weight->dataType = TypeId::kNumberTypeFloat32; | |||
| weight->dims = {32, 3, 3, 3}; | |||
| auto buf = new char *[1]; | |||
| //================================================================ | |||
| size_t weight_size; | |||
| std::string weight_path = "./convfp32_weight_32_3_3_3.bin"; | |||
| ReadFile(weight_path.c_str(), &weight_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto weight_data_temp = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, weight_data_temp); | |||
| weight->data.resize(sizeof(float) * 32 * 3 * 3 * 3); | |||
| //================================================================ | |||
| memcpy(weight->data.data(), weight_data_temp, weight_size); | |||
| weight->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(weight)); | |||
| auto output = std::make_unique<schema::TensorT>(); | |||
| output->nodeType = schema::NodeType::NodeType_Parameter; | |||
| output->format = schema::Format_NHWC; | |||
| output->dataType = TypeId::kNumberTypeFloat32; | |||
| output->dims = {1, 28, 28, 32}; | |||
| output->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(output)); | |||
| flatbuffers::FlatBufferBuilder builder(1024); | |||
| auto offset = schema::MetaGraph::Pack(builder, meta_graph.get()); | |||
| builder.Finish(offset); | |||
| size_t size = builder.GetSize(); | |||
| const char *content = reinterpret_cast<char *>(builder.GetBufferPointer()); | |||
| auto model = lite::Model::Import(content, size); | |||
| ASSERT_NE(nullptr, model); | |||
| meta_graph.reset(); | |||
| content = nullptr; | |||
| auto context = new lite::Context; | |||
| context->cpuBindMode = lite::NO_BIND; | |||
| context->deviceCtx.type = lite::DT_CPU; | |||
| context->threadNum = 4; | |||
| auto session = session::LiteSession::CreateSession(context); | |||
| ASSERT_NE(nullptr, session); | |||
| auto ret = session->CompileGraph(model.get()); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto inputs = session->GetInputs(); | |||
| ASSERT_EQ(inputs.size(), 1); | |||
| auto inTensor = inputs.front(); | |||
| ASSERT_NE(nullptr, inTensor); | |||
| auto data = inTensor->MutableData(); | |||
| //=================================================== | |||
| size_t input_size; | |||
| std::string input_path = "./convfp32_input_1_28_28_3.bin"; | |||
| ReadFile(input_path.c_str(), &input_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto input_data = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, input_data); | |||
| //=================================================== | |||
| ASSERT_EQ(input_size, inTensor->Size()); | |||
| memcpy(data, input_data, input_size); | |||
| ret = session->RunGraph(); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto outputs = session->GetOutputs(); | |||
| ASSERT_EQ(outputs.size(), 1); | |||
| auto outTensor = outputs.front(); | |||
| ASSERT_NE(nullptr, outTensor); | |||
| ASSERT_EQ(28 * 28 * 32, outTensor->ElementsNum()); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| auto *outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| //=================================================== | |||
| size_t output_size; | |||
| std::string output_path = "./convfp32_out_1_28_28_32.bin"; | |||
| ReadFile(output_path.c_str(), &output_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto output_data = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, output_data); | |||
| //=================================================== | |||
| ASSERT_EQ(output_size, outTensor->Size()); | |||
| for (size_t i = 0; i < outTensor->ElementsNum(); i++) { | |||
| ASSERT_EQ(output_data[i], outData[i]); | |||
| } | |||
| MS_LOG(INFO) << "Passed"; | |||
| } | |||
| TEST_F(InferTest, TestAddNode) { | |||
| auto meta_graph = std::make_shared<schema::MetaGraphT>(); | |||
| meta_graph->name = "graph"; | |||
| auto node = std::make_unique<schema::CNodeT>(); | |||
| node->inputIndex = {0, 1}; | |||
| node->outputIndex = {2}; | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_Add; | |||
| auto primitive = new schema::AddT; | |||
| node->primitive->value.value = primitive; | |||
| node->name = "Add"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| meta_graph->inputIndex = {0, 1}; | |||
| meta_graph->outputIndex = {2}; | |||
| auto input0 = std::make_unique<schema::TensorT>(); | |||
| input0->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| input0->format = schema::Format_NHWC; | |||
| input0->dataType = TypeId::kNumberTypeFloat32; | |||
| input0->dims = {1, 28, 28, 3}; | |||
| input0->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(input0)); | |||
| auto weight = std::make_unique<schema::TensorT>(); | |||
| weight->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| weight->format = schema::Format_KHWC; | |||
| weight->dataType = TypeId::kNumberTypeFloat32; | |||
| weight->dims = {1, 28, 28, 3}; | |||
| weight->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(weight)); | |||
| auto output = std::make_unique<schema::TensorT>(); | |||
| output->nodeType = schema::NodeType::NodeType_Parameter; | |||
| output->format = schema::Format_NHWC; | |||
| output->dataType = TypeId::kNumberTypeFloat32; | |||
| output->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(output)); | |||
| flatbuffers::FlatBufferBuilder builder(1024); | |||
| auto offset = schema::MetaGraph::Pack(builder, meta_graph.get()); | |||
| builder.Finish(offset); | |||
| size_t size = builder.GetSize(); | |||
| const char *content = reinterpret_cast<char *>(builder.GetBufferPointer()); | |||
| auto model = lite::Model::Import(content, size); | |||
| ASSERT_NE(nullptr, model); | |||
| meta_graph.reset(); | |||
| content = nullptr; | |||
| auto context = new lite::Context; | |||
| context->cpuBindMode = lite::NO_BIND; | |||
| context->deviceCtx.type = lite::DT_GPU; | |||
| context->threadNum = 4; | |||
| auto session = session::LiteSession::CreateSession(context); | |||
| ASSERT_NE(nullptr, session); | |||
| auto ret = session->CompileGraph(model.get()); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto inputs = session->GetInputs(); | |||
| ASSERT_EQ(inputs.size(), 2); | |||
| auto inTensor = inputs.front(); | |||
| ASSERT_NE(nullptr, inTensor); | |||
| (void)inTensor->MutableData(); | |||
| auto inTensor1 = inputs.back(); | |||
| ASSERT_NE(nullptr, inTensor1); | |||
| (void)inTensor1->MutableData(); | |||
| ret = session->RunGraph(); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto outputs = session->GetOutputs(); | |||
| ASSERT_EQ(outputs.size(), 1); | |||
| auto outTensor = outputs.front(); | |||
| ASSERT_NE(nullptr, outTensor); | |||
| ASSERT_EQ(28 * 28 * 3, outTensor->ElementsNum()); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); | |||
| auto *outData = reinterpret_cast<float *>(outTensor->MutableData()); | |||
| ASSERT_NE(nullptr, outData); | |||
| // //=================================================== | |||
| // size_t output_size; | |||
| // std::string output_path = "./convfp32_out_1_28_28_32.bin"; | |||
| // ReadFile(output_path.c_str(), &output_size, buf); | |||
| // ASSERT_NE(nullptr, buf[0]); | |||
| // auto output_data = reinterpret_cast<float *>(buf[0]); | |||
| // ASSERT_NE(nullptr, output_data); | |||
| // //=================================================== | |||
| // ASSERT_EQ(output_size, outTensor->Size()); | |||
| // for (size_t i = 0; i < outTensor->ElementsNum(); i++) { | |||
| // ASSERT_EQ(output_data[i], outData[i]); | |||
| // } | |||
| MS_LOG(INFO) << "Passed"; | |||
| } | |||
| TEST_F(InferTest, TestModel) { | |||
| auto buf = new char *[1]; | |||
| size_t model_size; | |||
| std::string model_path = "./model.ms"; | |||
| ReadFile(model_path.c_str(), &model_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto model = lite::Model::Import(buf[0], model_size); | |||
| ASSERT_NE(nullptr, model); | |||
| delete[] buf[0]; | |||
| auto context = new lite::Context; | |||
| context->cpuBindMode = lite::NO_BIND; | |||
| context->deviceCtx.type = lite::DT_CPU; | |||
| context->threadNum = 4; | |||
| auto session = session::LiteSession::CreateSession(context); | |||
| ASSERT_NE(nullptr, session); | |||
| auto ret = session->CompileGraph(model.get()); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto inputs = session->GetInputs(); | |||
| ASSERT_EQ(inputs.size(), 1); | |||
| auto inTensor = inputs.front(); | |||
| ASSERT_NE(nullptr, inTensor); | |||
| (void)inTensor->MutableData(); | |||
| ret = session->RunGraph(); | |||
| ASSERT_EQ(lite::RET_OK, ret); | |||
| auto outputs = session->GetOutputs(); | |||
| MS_LOG(INFO) << "Passed"; | |||
| } | |||
| // TEST_F(TrainTest, TestMultiNode) { | |||
| // auto msGraph = std::make_shared<schema::GraphDefT>(); | |||
| // msGraph->name = "graph"; | |||
| // auto msSubgraph = std::make_unique<schema::SubGraphDefT>(); | |||
| // msSubgraph->name = "subGraph"; | |||
| // | |||
| // auto conv = std::make_unique<schema::OpDefT>(); | |||
| // conv->inputIndex = {0, 1}; | |||
| // conv->outputIndex = {2}; | |||
| // conv->attr.type = schema::OpT_Conv2D; | |||
| // auto conv_attr = new schema::Conv2DT; | |||
| // conv_attr->padMode = schema::PadMode_SAME; | |||
| // conv_attr->format = schema::Format_NHWC; | |||
| // conv_attr->strideH = 1; | |||
| // conv_attr->strideW = 1; | |||
| // conv_attr->kernelH = 3; | |||
| // conv_attr->kernelW = 3; | |||
| // conv_attr->dilateH = 1; | |||
| // conv_attr->dilateW = 1; | |||
| // | |||
| // conv->attr.value = conv_attr; | |||
| // conv->name = "Conv2D"; | |||
| // conv->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(conv)); | |||
| // | |||
| // auto matMul1 = std::make_unique<schema::OpDefT>(); | |||
| // matMul1->inputIndex = {2, 3}; | |||
| // matMul1->outputIndex = {4}; | |||
| // matMul1->attr.type = schema::OpT_MatMul; | |||
| // auto matMul_attr1 = new schema::MatMulT; | |||
| // matMul_attr1->transposeA = false; | |||
| // matMul_attr1->transposeB = true; | |||
| // matMul1->attr.value = matMul_attr1; | |||
| // matMul1->name = "matmul1"; | |||
| // matMul1->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(matMul1)); | |||
| // | |||
| // auto matMul2 = std::make_unique<schema::OpDefT>(); | |||
| // matMul2->inputIndex = {4, 5}; | |||
| // matMul2->outputIndex = {6}; | |||
| // matMul2->attr.type = schema::OpT_MatMul; | |||
| // auto matMul_attr2 = new schema::MatMulT; | |||
| // matMul_attr2->transposeA = false; | |||
| // matMul_attr2->transposeB = true; | |||
| // matMul2->attr.value = matMul_attr2; | |||
| // matMul2->name = "matmul2"; | |||
| // matMul2->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(matMul2)); | |||
| // | |||
| // msSubgraph->inputIndex = {0}; | |||
| // msSubgraph->outputIndex = {6}; | |||
| // | |||
| // auto input0 = std::make_unique<schema::TensorDefT>(); | |||
| // input0->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // input0->format = schema::Format_NHWC; | |||
| // input0->dataType = TypeId::kNumberTypeFloat32; | |||
| // input0->dims = {1, 5, 5, 3}; | |||
| // input0->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(input0)); | |||
| // | |||
| // auto conv_weight = std::make_unique<schema::TensorDefT>(); | |||
| // conv_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // conv_weight->format = schema::Format_KHWC; | |||
| // conv_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // conv_weight->dims = {8, 3, 3, 3}; | |||
| // conv_weight->data.resize(8*3*3*3*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(conv_weight)); | |||
| // | |||
| // auto conv_output = std::make_unique<schema::TensorDefT>(); | |||
| // conv_output->refCount = 0; | |||
| // conv_output->format = schema::Format_NHWC; | |||
| // conv_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // conv_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(conv_output)); | |||
| // | |||
| // auto add_weight = std::make_unique<schema::TensorDefT>(); | |||
| // add_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // add_weight->format = schema::Format_NHWC; | |||
| // add_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // add_weight->dims = {1, 5, 5, 8}; | |||
| // add_weight->data.resize(5*5*8*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(add_weight)); | |||
| // | |||
| // auto add_output = std::make_unique<schema::TensorDefT>(); | |||
| // add_output->refCount = 0; | |||
| // add_output->format = schema::Format_NHWC; | |||
| // add_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // add_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(add_output)); | |||
| // | |||
| // auto mul_weight = std::make_unique<schema::TensorDefT>(); | |||
| // mul_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // mul_weight->format = schema::Format_NHWC; | |||
| // mul_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // mul_weight->dims = {1, 5, 5, 8}; | |||
| // mul_weight->data.resize(5*5*8*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(mul_weight)); | |||
| // | |||
| // auto mul_output = std::make_unique<schema::TensorDefT>(); | |||
| // mul_output->refCount = 0; | |||
| // mul_output->format = schema::Format_NHWC; | |||
| // mul_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // mul_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(mul_output)); | |||
| // msGraph->subgraphs.emplace_back(std::move(msSubgraph)); | |||
| // | |||
| // flatbuffers::FlatBufferBuilder builder(1024); | |||
| // auto offset = schema::GraphDef::Pack(builder, msGraph.get()); | |||
| // builder.Finish(offset); | |||
| // size_t size = builder.GetSize(); | |||
| // const char *content = (char *)builder.GetBufferPointer(); | |||
| // const std::string strstub = ""; | |||
| // | |||
| // auto func_graph = inference::LoadModel(content, size, strstub); | |||
| // ASSERT_NE(nullptr, func_graph); | |||
| // auto session = inference::MSSession::CreateSession(kCPUDevice, 0); | |||
| // ASSERT_NE(nullptr, session); | |||
| // auto graphId = session->CompileGraph(func_graph); | |||
| // | |||
| // auto inTensor = | |||
| // std::shared_ptr<inference::MSTensor>(inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, {1, 5, 5, 3})); | |||
| // ASSERT_NE(nullptr, inTensor); | |||
| // ASSERT_EQ(sizeof(float) * (5 * 5 * 3), inTensor->Size()); | |||
| // (void)inTensor->MutableData(); | |||
| // | |||
| // std::vector<std::shared_ptr<inference::MSTensor>> inputs; | |||
| // inputs.emplace_back(inTensor); | |||
| // auto outputs = session->RunGraph(graphId, inputs); | |||
| // ASSERT_EQ(1, outputs.size()); | |||
| // ASSERT_EQ(1, outputs.front().size()); | |||
| // auto runOutput = outputs.front().front(); | |||
| // ASSERT_NE(nullptr, runOutput); | |||
| // ASSERT_EQ(5 * 5 * 8, runOutput->ElementsNum()); | |||
| // ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type()); | |||
| // MS_LOG(INFO) << "Passed"; | |||
| //} | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,303 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/pack.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h" | |||
| namespace mindspore { | |||
| class TestPack : public mindspore::Common { | |||
| public: | |||
| TestPack() {} | |||
| }; | |||
| void InitConvParamPack(ConvParameter *conv_param) { | |||
| conv_param->input_batch_ = 1; | |||
| conv_param->input_h_ = 28; | |||
| conv_param->input_w_ = 28; | |||
| conv_param->input_channel_ = 3; | |||
| conv_param->output_batch_ = 1; | |||
| conv_param->output_h_ = 28; | |||
| conv_param->output_w_ = 28; | |||
| conv_param->output_channel_ = 32; | |||
| conv_param->kernel_h_ = 3; | |||
| conv_param->kernel_w_ = 3; | |||
| conv_param->stride_h_ = 1; | |||
| conv_param->stride_w_ = 1; | |||
| conv_param->dilation_h_ = 1; | |||
| conv_param->dilation_w_ = 1; | |||
| conv_param->pad_h_ = 1; | |||
| conv_param->pad_w_ = 1; | |||
| } | |||
| TEST_F(TestPack, PackInputFp32) { | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto conv_param = new ConvParameter; | |||
| InitConvParamPack(conv_param); | |||
| int kernel_h = conv_param->kernel_h_; | |||
| int kernel_w = conv_param->kernel_w_; | |||
| int in_batch = conv_param->input_batch_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int in_h = conv_param->input_h_; | |||
| int in_w = conv_param->input_w_; | |||
| int out_h = conv_param->output_h_; | |||
| int out_w = conv_param->output_w_; | |||
| int thread_count = 1; | |||
| int tile_n = 8; | |||
| int output_count = out_h * out_w; | |||
| int output_tile_count = UP_DIV(output_count, tile_n); | |||
| int inchannel_block = 4; | |||
| int channel_block = UP_DIV(in_channel, inchannel_block); | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int unit_size = kernel_plane * channel_block * inchannel_block; | |||
| int packed_input_size = output_tile_count * tile_n * unit_size; | |||
| auto packed_input = reinterpret_cast<float *>(malloc(in_batch * packed_input_size * sizeof(float))); | |||
| memset(packed_input, 0, in_batch * packed_input_size * sizeof(float)); | |||
| for (int b = 0; b < in_batch; b++) { | |||
| int in_batch_offset = b * in_channel * in_h * in_w; | |||
| int gemm_in_batch_offset = b * packed_input_size; | |||
| for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) { | |||
| int start_index = thread_id * tile_n; | |||
| int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n; | |||
| float *gemm_input = | |||
| reinterpret_cast<float *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset; | |||
| Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index); | |||
| } | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << packed_input[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string file_path = "./test_data/conv/convfp32_packinput.txt"; | |||
| // mindspore::lite::WriteToTxt<float>(file_path, packed_data, in_batch * packed_input_size); | |||
| delete input_data; | |||
| delete conv_param; | |||
| free(packed_input); | |||
| MS_LOG(INFO) << "TestPackInputFp32 passed"; | |||
| } | |||
| TEST_F(TestPack, PackWeightFp32) { | |||
| auto conv_param = new ConvParameter; | |||
| InitConvParamPack(conv_param); | |||
| int k_h = conv_param->kernel_h_; | |||
| int k_w = conv_param->kernel_w_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int out_channel = conv_param->output_channel_; | |||
| int ic4 = UP_DIV(in_channel, C4NUM); | |||
| int oc8 = UP_DIV(out_channel, C8NUM); | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_3.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| auto packed_weight = reinterpret_cast<float *>(malloc(k_h * k_w * ic4 * C4NUM * oc8 * C8NUM * sizeof(float))); | |||
| PackWeightFp32(weight_data, conv_param, packed_weight); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << packed_weight[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| free(packed_weight); | |||
| delete conv_param; | |||
| MS_LOG(INFO) << "TestPackWeightFp32 passed"; | |||
| } | |||
| #ifdef ENABLE_FP16 | |||
| TEST_F(TestPack, PackInputFp16) { | |||
| // todo | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| int input_ele_size = input_size / sizeof(float); | |||
| auto fp16_input_data = new float16_t[input_ele_size]; | |||
| for (int i = 0; i < input_ele_size; i++) { | |||
| fp16_input_data[i] = (float16_t)input_data[i]; | |||
| } | |||
| auto conv_param = new ConvParameter; | |||
| InitConvParamPack(conv_param); | |||
| int kernel_h = conv_param->kernel_h_; | |||
| int kernel_w = conv_param->kernel_w_; | |||
| int in_batch = conv_param->input_batch_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int in_h = conv_param->input_h_; | |||
| int in_w = conv_param->input_w_; | |||
| int out_h = conv_param->output_h_; | |||
| int out_w = conv_param->output_w_; | |||
| int thread_count = 1; | |||
| int tile_n = 16; | |||
| int output_count = out_h * out_w; | |||
| int output_tile_count = UP_DIV(output_count, tile_n); | |||
| int inchannel_block = 8; | |||
| int channel_block = UP_DIV(in_channel, inchannel_block); | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int unit_size = kernel_plane * channel_block * inchannel_block; | |||
| int packed_input_size = output_tile_count * tile_n * unit_size; | |||
| auto packed_input = reinterpret_cast<float *>(malloc(in_batch * packed_input_size * sizeof(float16_t))); | |||
| memset(packed_input, 0, in_batch * packed_input_size * sizeof(float16_t)); | |||
| for (int b = 0; b < in_batch; b++) { | |||
| int in_batch_offset = b * in_channel * in_h * in_w; | |||
| int gemm_in_batch_offset = b * packed_input_size; | |||
| for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) { | |||
| int start_index = thread_id * tile_n; | |||
| int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n; | |||
| float16_t *gemm_input = | |||
| reinterpret_cast<float16_t *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset; | |||
| Im2ColPackUnitFp16(fp16_input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index); | |||
| } | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << packed_input[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| delete input_data; | |||
| delete[] fp16_input_data; | |||
| delete conv_param; | |||
| delete packed_input; | |||
| MS_LOG(INFO) << "TestPackInputFp16 passed"; | |||
| } | |||
| #endif | |||
| TEST_F(TestPack, PackInputUint8) { | |||
| auto conv_param = new ConvParameter; | |||
| InitConvParamPack(conv_param); | |||
| int kernel_h = conv_param->kernel_h_; | |||
| int kernel_w = conv_param->kernel_w_; | |||
| int in_batch = conv_param->input_batch_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int in_h = conv_param->input_h_; | |||
| int in_w = conv_param->input_w_; | |||
| int out_h = conv_param->output_h_; | |||
| int out_w = conv_param->output_w_; | |||
| int thread_count = 1; | |||
| int tile_n = 8; | |||
| int output_count = out_h * out_w; | |||
| int output_tile_count = UP_DIV(output_count, tile_n); | |||
| int inchannel_block = 4; | |||
| int channel_block = UP_DIV(in_channel, inchannel_block); | |||
| int kernel_plane = kernel_h * kernel_w; | |||
| int unit_size = kernel_plane * channel_block * inchannel_block; | |||
| int packed_input_size = output_tile_count * tile_n * unit_size; | |||
| // input | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convuint8_input_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<uint8_t *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto int8_input = reinterpret_cast<int8_t *>(malloc(input_size)); | |||
| for (int i = 0; i < input_size; i++) { | |||
| int8_input[i] = (int8_t)(input_data[i] - 128); | |||
| } | |||
| auto packed_input = reinterpret_cast<int8_t *>(malloc(in_batch * packed_input_size)); | |||
| memset(packed_input, 0, in_batch * packed_input_size); | |||
| int32_t *input_sum = reinterpret_cast<int32_t *>(malloc(tile_n * thread_count * sizeof(int32_t))); | |||
| for (int b = 0; b < in_batch; b++) { | |||
| int in_batch_offset = b * in_channel * in_h * in_w; | |||
| int gemm_in_batch_offset = b * packed_input_size; | |||
| for (int thread_id = 0; thread_id < output_tile_count; thread_id += thread_count) { | |||
| int start_index = thread_id * tile_n; | |||
| int real_cal_num = (output_count - start_index) < tile_n ? (output_count - tile_n) : tile_n; | |||
| int8_t *gemm_input = | |||
| reinterpret_cast<int8_t *>(packed_input) + thread_id * unit_size * tile_n + gemm_in_batch_offset; | |||
| memset(input_sum, 0, tile_n * thread_count * sizeof(int32_t)); | |||
| Im2ColPackUnitInt8(int8_input + in_batch_offset, gemm_input, real_cal_num, start_index, input_sum, conv_param); | |||
| } | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << static_cast<int>(packed_input[i]) << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| delete input_data; | |||
| delete conv_param; | |||
| free(int8_input); | |||
| free(packed_input); | |||
| free(input_sum); | |||
| MS_LOG(INFO) << "TestPackInputUint8 passed"; | |||
| } | |||
| TEST_F(TestPack, PackWeightUint8) { | |||
| auto conv_param = new ConvParameter; | |||
| InitConvParamPack(conv_param); | |||
| int k_h = conv_param->kernel_h_; | |||
| int k_w = conv_param->kernel_w_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int out_channel = conv_param->output_channel_; | |||
| int ic4 = UP_DIV(in_channel, C4NUM); | |||
| int oc4 = UP_DIV(out_channel, C4NUM); | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convuint8_weight_32_3_3_3.bin"; | |||
| auto weight_data = reinterpret_cast<uint8_t *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| auto int8_weight = reinterpret_cast<int8_t *>(malloc(weight_size)); | |||
| for (int i = 0; i < weight_size; i++) { | |||
| int8_weight[i] = (int8_t)(weight_data[i] - 128); | |||
| } | |||
| int32_t filter_zp = 20; | |||
| int32_t *weight_sum = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t) * out_channel)); | |||
| for (int i = 0; i < out_channel; i++) weight_sum[i] = filter_zp * ic4 * C4NUM * k_h * k_w; | |||
| auto packed_weight = reinterpret_cast<int8_t *>(malloc(k_h * k_w * ic4 * C4NUM * oc4 * C4NUM)); | |||
| PackWeightInt8(int8_weight, conv_param, packed_weight, weight_sum); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << static_cast<int>(packed_weight[i]) << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| free(weight_sum); | |||
| free(int8_weight); | |||
| free(packed_weight); | |||
| delete conv_param; | |||
| MS_LOG(INFO) << "TestPackWeightUint8 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,593 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/common/utils.h" | |||
| #include "src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h" | |||
| #include "src/runtime/kernel/arm/opclib/fp16/conv_fp16.h" | |||
| namespace mindspore { | |||
| class TestConvolutionFp16 : public mindspore::Common { | |||
| public: | |||
| TestConvolutionFp16() {} | |||
| }; | |||
| void InitConvParamGroup1Fp16(ConvParameter *conv_param) { | |||
| conv_param->input_batch_ = 1; | |||
| conv_param->input_h_ = 28; | |||
| conv_param->input_w_ = 28; | |||
| conv_param->input_channel_ = 3; | |||
| conv_param->output_batch_ = 1; | |||
| conv_param->output_h_ = 28; | |||
| conv_param->output_w_ = 28; | |||
| conv_param->output_channel_ = 32; | |||
| conv_param->kernel_h_ = 3; | |||
| conv_param->kernel_w_ = 3; | |||
| conv_param->stride_h_ = 1; | |||
| conv_param->stride_w_ = 1; | |||
| conv_param->dilation_h_ = 1; | |||
| conv_param->dilation_w_ = 1; | |||
| conv_param->pad_h_ = 1; | |||
| conv_param->pad_w_ = 1; | |||
| conv_param->thread_num_ = 1; | |||
| } | |||
| void InitConvParamGroup2Fp16(ConvParameter *conv_param) { | |||
| conv_param->input_batch_ = 1; | |||
| conv_param->input_h_ = 128; | |||
| conv_param->input_w_ = 128; | |||
| conv_param->input_channel_ = 32; | |||
| conv_param->output_batch_ = 1; | |||
| conv_param->output_h_ = 128; | |||
| conv_param->output_w_ = 128; | |||
| conv_param->output_channel_ = 32; | |||
| conv_param->kernel_h_ = 3; | |||
| conv_param->kernel_w_ = 3; | |||
| conv_param->stride_h_ = 1; | |||
| conv_param->stride_w_ = 1; | |||
| conv_param->dilation_h_ = 1; | |||
| conv_param->dilation_w_ = 1; | |||
| conv_param->pad_h_ = 1; | |||
| conv_param->pad_w_ = 1; | |||
| conv_param->thread_num_ = 1; | |||
| } | |||
| TEST_F(TestConvolutionFp16, ConvTest1) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| InitConvParamGroup1Fp16(conv_param); | |||
| int tile_num = 16; | |||
| int k_h = conv_param->kernel_h_; | |||
| int k_w = conv_param->kernel_w_; | |||
| int kernel_plane = k_h * k_w; | |||
| int in_batch = conv_param->input_batch_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int i_h = conv_param->input_h_; | |||
| int i_w = conv_param->input_w_; | |||
| int out_channel = conv_param->output_channel_; | |||
| int ic4 = UP_DIV(in_channel, C4NUM); | |||
| int oc8 = UP_DIV(out_channel, C8NUM); | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_3.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| std::cout << "==============fp32 weight data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << weight_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| std::cout << "weight data size: " << weight_size / sizeof(float) << std::endl; | |||
| int weight_ele_size = weight_size / sizeof(float); | |||
| auto fp16_weight_data = new float16_t[weight_ele_size]; | |||
| for (int i = 0; i < weight_ele_size; i++) { | |||
| fp16_weight_data[i] = static_cast<float16_t>(weight_data[i]); | |||
| } | |||
| std::cout << "==============fp16 weight data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp16_weight_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| auto packed_weight = reinterpret_cast<float16_t *>(malloc(k_h * k_w * ic4 * C4NUM * oc8 * C8NUM * sizeof(float16_t))); | |||
| PackWeightFp16(fp16_weight_data, conv_param, packed_weight); | |||
| std::cout << "==============fp16 packed weight data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << packed_weight[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::cout << "==============fp32 input data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << input_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| int input_ele_size = input_size / sizeof(float); | |||
| auto fp16_input_data = new float16_t[input_ele_size]; | |||
| for (int i = 0; i < input_ele_size; i++) { | |||
| fp16_input_data[i] = static_cast<float16_t>(input_data[i]); | |||
| } | |||
| auto nhwc4_input_data = reinterpret_cast<float16_t *>(malloc(i_h * i_w * ic4 * C4NUM* sizeof(float16_t))); | |||
| PackNHWCToNHWC4Fp32(fp16_input_data, nhwc4_input_data, 1, i_h * i_w, in_channel); | |||
| std::cout << "==============fp16 input data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp16_input_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| int output_count = conv_param->output_h_ * conv_param->output_w_; | |||
| int output_tile_count = UP_DIV(output_count, tile_num); | |||
| int unit_size = kernel_plane * ic4 * C4NUM; | |||
| int packed_input_size = output_tile_count * tile_num * unit_size; | |||
| auto packed_input = reinterpret_cast<float16_t *>(malloc(in_batch * packed_input_size * sizeof(float16_t))); | |||
| memset(packed_input, 0, in_batch * packed_input_size * sizeof(float16_t)); | |||
| auto bias_data = reinterpret_cast<float16_t *>(malloc(conv_param->output_channel_ * sizeof(float16_t))); | |||
| memset(bias_data, 0, conv_param->output_channel_ * sizeof(float16_t)); | |||
| size_t output_data_size = | |||
| conv_param->output_batch_ * conv_param->output_channel_ * conv_param->output_h_ * conv_param->output_w_; | |||
| auto output_data = new float16_t[output_data_size]; | |||
| auto tmp_output_block = reinterpret_cast<float16_t *>(malloc(tile_num * out_channel * sizeof(float16_t))); | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| // warmup | |||
| for (int i = 0; i < 3; i++) { | |||
| ConvFp16(nhwc4_input_data, packed_input, packed_weight, bias_data, tmp_output_block, output_data, 0, conv_param); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| ConvFp16(nhwc4_input_data, packed_input, packed_weight, bias_data, tmp_output_block, output_data, 0, conv_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::cout << "==============fp16 output data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << output_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| auto fp32_output_data = new float[output_data_size]; | |||
| for (int i = 0; i < output_data_size; i++) { | |||
| fp32_output_data[i] = static_cast<float>(output_data[i]); | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp32_output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/conv/convfp32_out_1_28_28_32.bin"; | |||
| lite::CompareOutput(fp32_output_data, output_path); | |||
| free(nhwc4_input_data); | |||
| free(packed_input); | |||
| free(bias_data); | |||
| free(packed_weight); | |||
| free(tmp_output_block); | |||
| delete conv_param; | |||
| delete input_data; | |||
| delete weight_data; | |||
| delete[] fp16_weight_data; | |||
| delete[] fp16_input_data; | |||
| delete[] fp32_output_data; | |||
| delete[] output_data; | |||
| MS_LOG(INFO) << "TestConvolutionFp16 passed"; | |||
| } | |||
| TEST_F(TestConvolutionFp16, ConvTest2) { | |||
| // prepare stage | |||
| auto conv_param = new ConvParameter(); | |||
| InitConvParamGroup2Fp16(conv_param); | |||
| // parameter | |||
| int tile_num = 16; | |||
| int k_h = conv_param->kernel_h_; | |||
| int k_w = conv_param->kernel_w_; | |||
| int kernel_plane = k_h * k_w; | |||
| int in_batch = conv_param->input_batch_; | |||
| int in_channel = conv_param->input_channel_; | |||
| int out_channel = conv_param->output_channel_; | |||
| int ic4 = UP_DIV(in_channel, C4NUM); | |||
| int oc8 = UP_DIV(out_channel, C8NUM); | |||
| // weight | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_32.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| int weight_ele_size = weight_size / sizeof(float); | |||
| auto fp16_weight_data = new float16_t[weight_ele_size]; | |||
| for (int i = 0; i < weight_ele_size; i++) { | |||
| fp16_weight_data[i] = static_cast<float16_t>(weight_data[i]); | |||
| } | |||
| auto packed_weight = reinterpret_cast<float16_t *>(malloc(k_h * k_w * ic4 * C4NUM * oc8 * C8NUM * sizeof(float16_t))); | |||
| PackWeightFp16(fp16_weight_data, conv_param, packed_weight); | |||
| // input | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_128_128_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| int input_ele_size = input_size / sizeof(float); | |||
| auto fp16_input_data = new float16_t[input_ele_size]; | |||
| for (int i = 0; i < input_ele_size; i++) { | |||
| fp16_input_data[i] = static_cast<float16_t>(input_data[i]); | |||
| } | |||
| int output_count = conv_param->output_h_ * conv_param->output_w_; | |||
| int output_tile_count = UP_DIV(output_count, tile_num); | |||
| int unit_size = kernel_plane * ic4 * C4NUM; | |||
| int packed_input_size = output_tile_count * tile_num * unit_size; | |||
| auto packed_input = reinterpret_cast<float16_t *>(malloc(in_batch * packed_input_size * sizeof(float16_t))); | |||
| memset(packed_input, 0, in_batch * packed_input_size * sizeof(float16_t)); | |||
| // bias | |||
| auto bias_data = reinterpret_cast<float16_t *>(malloc(conv_param->output_channel_ * sizeof(float16_t))); | |||
| memset(bias_data, 0, conv_param->output_channel_ * sizeof(float16_t)); | |||
| // output | |||
| auto tmp_output_block = reinterpret_cast<float16_t *>(malloc(tile_num * out_channel * sizeof(float16_t))); | |||
| size_t output_data_size = | |||
| conv_param->output_batch_ * conv_param->output_channel_ * conv_param->output_h_ * conv_param->output_w_; | |||
| auto output_data = new float16_t[output_data_size]; | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| // warmup | |||
| for (int i = 0; i < 3; i++) { | |||
| ConvFp16(fp16_input_data, packed_input, packed_weight, bias_data, tmp_output_block, output_data, 0, conv_param); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| ConvFp16(fp16_input_data, packed_input, packed_weight, bias_data, tmp_output_block, output_data, 0, conv_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::cout << "==============fp16 output data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << output_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| auto fp32_output_data = new float[output_data_size]; | |||
| for (int i = 0; i < output_data_size; i++) { | |||
| fp32_output_data[i] = static_cast<float>(output_data[i]); | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp32_output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/conv/convfp32_out_1_128_128_32.bin"; | |||
| lite::CompareOutput(fp32_output_data, output_path); | |||
| free(packed_input); | |||
| free(bias_data); | |||
| free(packed_weight); | |||
| free(tmp_output_block); | |||
| delete conv_param; | |||
| delete input_data; | |||
| delete weight_data; | |||
| delete[] fp16_weight_data; | |||
| delete[] fp16_input_data; | |||
| delete[] fp32_output_data; | |||
| delete[] output_data; | |||
| MS_LOG(INFO) << "TestConvolutionFp16 passed"; | |||
| } | |||
| TEST_F(TestConvolutionFp16, Conv3x3Test1) { | |||
| auto conv_param = new ConvParameter(); | |||
| InitConvParamGroup1Fp16(conv_param); | |||
| // todo | |||
| int thread_count = 1; | |||
| int tile_num = 16; | |||
| int output_batch = conv_param->output_batch_; | |||
| int output_h = conv_param->output_h_; | |||
| int output_w = conv_param->output_w_; | |||
| int ic4 = UP_DIV(conv_param->input_channel_, C4NUM); | |||
| int oc8 = UP_DIV(conv_param->output_channel_, C8NUM); | |||
| // tmp buffer | |||
| int k_plane = 36; | |||
| size_t tile_buffer_size = thread_count * tile_num * k_plane * ic4 * C4NUM * sizeof(float16_t); | |||
| float16_t *tile_buffer = reinterpret_cast<float16_t *>(malloc(tile_buffer_size)); | |||
| memset(tile_buffer, 0, tile_buffer_size); | |||
| size_t block_unit_buffer_size = thread_count * k_plane * C4NUM * sizeof(float16_t); | |||
| float16_t *block_unit_buffer = reinterpret_cast<float16_t *>(malloc(block_unit_buffer_size)); | |||
| memset(block_unit_buffer, 0, block_unit_buffer_size); | |||
| size_t tmp_dst_buffer_size = thread_count * tile_num * k_plane * oc8 * C8NUM * sizeof(float16_t); | |||
| float16_t *tmp_dst_buffer = reinterpret_cast<float16_t *>(malloc(tmp_dst_buffer_size)); | |||
| memset(tmp_dst_buffer, 0, tmp_dst_buffer_size); | |||
| size_t tmp_out_size = oc8 * C8NUM * output_batch * output_h * output_w * tile_num * sizeof(float16_t); | |||
| float16_t *tmp_out = reinterpret_cast<float16_t *>(malloc(tmp_out_size)); | |||
| memset(tmp_out, 0, tmp_out_size); | |||
| // weight | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_3.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| std::cout << "==============fp32 weight data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << weight_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| std::cout << "weight data size: " << weight_size / sizeof(float) << std::endl; | |||
| int weight_ele_size = weight_size / sizeof(float); | |||
| auto fp16_weight_data = new float16_t[weight_ele_size]; | |||
| for (int i = 0; i < weight_ele_size; i++) { | |||
| fp16_weight_data[i] = (float16_t)weight_data[i]; | |||
| } | |||
| std::cout << "==============fp16 weight data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp16_weight_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| size_t transformed_size = ic4 * C4NUM * oc8 * C8NUM * 36; | |||
| auto transformed_weight_data = new float16_t[transformed_size]; | |||
| memset(transformed_weight_data, 0, transformed_size * sizeof(float16_t)); | |||
| kernel::ProcessFilterFp16(fp16_weight_data, transformed_weight_data, conv_param); | |||
| // bias | |||
| auto bias_data = | |||
| reinterpret_cast<float16_t *>(malloc(UP_DIV(conv_param->output_channel_, 8) * 8 * sizeof(float16_t))); | |||
| memset(bias_data, 0, UP_DIV(conv_param->output_channel_, 8) * 8 * sizeof(float16_t)); | |||
| // input | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_28_28_3.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| std::cout << "==============fp32 input data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << input_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| int input_ele_size = input_size / sizeof(float); | |||
| auto fp16_input_data = new float16_t[input_ele_size]; | |||
| for (int i = 0; i < input_ele_size; i++) { | |||
| fp16_input_data[i] = static_cast<float16_t>(input_data[i]); | |||
| } | |||
| std::cout << "==============fp16 input data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp16_input_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| // output | |||
| size_t output_data_size = | |||
| conv_param->output_batch_ * conv_param->output_channel_ * conv_param->output_h_ * conv_param->output_w_; | |||
| auto output_data = new float16_t[output_data_size]; | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| // warmup | |||
| for (int i = 0; i < 3; i++) { | |||
| Conv3x3Fp16(fp16_input_data, transformed_weight_data, bias_data, output_data, tile_buffer, block_unit_buffer, | |||
| tmp_dst_buffer, tmp_out, 0, conv_param); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| Conv3x3Fp16(fp16_input_data, transformed_weight_data, bias_data, output_data, tile_buffer, block_unit_buffer, | |||
| tmp_dst_buffer, tmp_out, 0, conv_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::cout << "==============fp16 output data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << output_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| auto fp32_output_data = new float[output_data_size]; | |||
| for (int i = 0; i < output_data_size; i++) { | |||
| fp32_output_data[i] = static_cast<float>(output_data[i]); | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp32_output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/conv/convfp32_out_1_28_28_32.bin"; | |||
| lite::CompareOutput(fp32_output_data, output_path); | |||
| free(bias_data); | |||
| free(tile_buffer); | |||
| free(block_unit_buffer); | |||
| free(tmp_dst_buffer); | |||
| free(tmp_out); | |||
| delete input_data; | |||
| delete weight_data; | |||
| delete conv_param; | |||
| delete[] fp16_weight_data; | |||
| delete[] fp16_input_data; | |||
| delete[] fp32_output_data; | |||
| delete[] output_data; | |||
| delete[] transformed_weight_data; | |||
| MS_LOG(INFO) << "TestConvolutionFp16 Conv3x3 passed"; | |||
| } | |||
| TEST_F(TestConvolutionFp16, Conv3x3Test2) { | |||
| auto conv_param = new ConvParameter(); | |||
| InitConvParamGroup2Fp16(conv_param); | |||
| // todo | |||
| int thread_count = 1; | |||
| int tile_num = 16; | |||
| int output_batch = conv_param->output_batch_; | |||
| int output_h = conv_param->output_h_; | |||
| int output_w = conv_param->output_w_; | |||
| int ic4 = UP_DIV(conv_param->input_channel_, C4NUM); | |||
| int oc8 = UP_DIV(conv_param->output_channel_, C8NUM); | |||
| // tmp buffer | |||
| int k_plane = 36; | |||
| size_t tile_buffer_size = thread_count * tile_num * k_plane * ic4 * C4NUM * sizeof(float16_t); | |||
| float16_t *tile_buffer = reinterpret_cast<float16_t *>(malloc(tile_buffer_size)); | |||
| memset(tile_buffer, 0, tile_buffer_size); | |||
| size_t block_unit_buffer_size = thread_count * k_plane * C4NUM * sizeof(float16_t); | |||
| float16_t *block_unit_buffer = reinterpret_cast<float16_t *>(malloc(block_unit_buffer_size)); | |||
| memset(block_unit_buffer, 0, block_unit_buffer_size); | |||
| size_t tmp_dst_buffer_size = thread_count * tile_num * k_plane * oc8 * C8NUM * sizeof(float16_t); | |||
| float16_t *tmp_dst_buffer = reinterpret_cast<float16_t *>(malloc(tmp_dst_buffer_size)); | |||
| memset(tmp_dst_buffer, 0, tmp_dst_buffer_size); | |||
| size_t tmp_out_size = oc8 * C8NUM * output_batch * output_h * output_w * tile_num * sizeof(float16_t); | |||
| float16_t *tmp_out = reinterpret_cast<float16_t *>(malloc(tmp_out_size)); | |||
| memset(tmp_out, 0, tmp_out_size); | |||
| // weight | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/conv/convfp32_weight_32_3_3_32.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| int weight_ele_size = weight_size / sizeof(float); | |||
| auto fp16_weight_data = new float16_t[weight_ele_size]; | |||
| for (int i = 0; i < weight_ele_size; i++) { | |||
| fp16_weight_data[i] = static_cast<float16_t>(weight_data[i]); | |||
| } | |||
| size_t transformed_size = ic4 * C4NUM * oc8 * C8NUM * 36; | |||
| auto transformed_weight_data = new float16_t[transformed_size]; | |||
| memset(transformed_weight_data, 0, transformed_size * sizeof(float16_t)); | |||
| kernel::ProcessFilterFp16(fp16_weight_data, transformed_weight_data, conv_param); | |||
| // bias | |||
| auto bias_data = | |||
| reinterpret_cast<float16_t *>(malloc(UP_DIV(conv_param->output_channel_, 8) * 8 * sizeof(float16_t))); | |||
| memset(bias_data, 0, UP_DIV(conv_param->output_channel_, 8) * 8 * sizeof(float16_t)); | |||
| // input | |||
| size_t input_size; | |||
| std::string input_path = "./test_data/conv/convfp32_input_1_128_128_32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| int input_ele_size = input_size / sizeof(float); | |||
| auto fp16_input_data = new float16_t[input_ele_size]; | |||
| for (int i = 0; i < input_ele_size; i++) { | |||
| fp16_input_data[i] = static_cast<float16_t>(input_data[i]); | |||
| } | |||
| // output | |||
| size_t output_data_size = | |||
| conv_param->output_batch_ * conv_param->output_channel_ * conv_param->output_h_ * conv_param->output_w_; | |||
| auto output_data = new float16_t[output_data_size]; | |||
| // runtime part | |||
| printf("Calculating runtime cost...\n"); | |||
| uint64_t time_avg = 0; | |||
| // warmup | |||
| for (int i = 0; i < 3; i++) { | |||
| Conv3x3Fp16(fp16_input_data, transformed_weight_data, bias_data, output_data, tile_buffer, block_unit_buffer, | |||
| tmp_dst_buffer, tmp_out, 0, conv_param); | |||
| } | |||
| int loop_count = 100; | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| for (int i = 0; i < loop_count; i++) { | |||
| Conv3x3Fp16(fp16_input_data, transformed_weight_data, bias_data, output_data, tile_buffer, block_unit_buffer, | |||
| tmp_dst_buffer, tmp_out, 0, conv_param); | |||
| } | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| auto cost = time_end - time_start; | |||
| time_avg = cost / loop_count; | |||
| printf("single thread running time : %f ms\n", time_avg / 1000.0f); | |||
| std::cout << "==============fp16 output data===========" << std::endl; | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << output_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| auto fp32_output_data = new float[output_data_size]; | |||
| for (int i = 0; i < output_data_size; i++) { | |||
| fp32_output_data[i] = static_cast<float>(output_data[i]); | |||
| } | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 20; i++) { | |||
| std::cout << fp32_output_data[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| std::string output_path = "./test_data/conv/convfp32_out_1_128_128_32.bin"; | |||
| lite::CompareOutput(fp32_output_data, output_path); | |||
| free(bias_data); | |||
| free(tile_buffer); | |||
| free(block_unit_buffer); | |||
| free(tmp_dst_buffer); | |||
| free(tmp_out); | |||
| delete input_data; | |||
| delete weight_data; | |||
| delete conv_param; | |||
| delete[] fp16_weight_data; | |||
| delete[] fp16_input_data; | |||
| delete[] fp32_output_data; | |||
| delete[] output_data; | |||
| delete[] transformed_weight_data; | |||
| MS_LOG(INFO) << "TestConvolutionFp16 Conv3x3 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,128 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/activation.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "mindspore/lite/src/lite_kernel.h" | |||
| namespace mindspore { | |||
| class TestActivationFp32 : public mindspore::Common { | |||
| public: | |||
| TestActivationFp32() {} | |||
| }; | |||
| TEST_F(TestActivationFp32, ReluFp32) { | |||
| float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | |||
| float output[8] = {0}; | |||
| Relu(input, 8, output); | |||
| float expect[8] = {0, 0, 0, 0, 1, 5, 6, 7}; | |||
| for (int i = 0; i < 8; ++i) { | |||
| ASSERT_EQ(output[i], expect[i]); | |||
| } | |||
| } | |||
| TEST_F(TestActivationFp32, Relu6Fp32) { | |||
| float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | |||
| float output[8] = {0}; | |||
| Relu6(input, 8, output); | |||
| float expect[8] = {0, 0, 0, 0, 1, 5, 6, 6}; | |||
| for (int i = 0; i < 8; ++i) { | |||
| ASSERT_EQ(output[i], expect[i]); | |||
| } | |||
| MS_LOG(INFO) << "TestActivationFp32 passed"; | |||
| } | |||
| TEST_F(TestActivationFp32, LReluFp32) { | |||
| float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | |||
| float output[8] = {0}; | |||
| LRelu(input, 8, output, 0.01); | |||
| float expect[8] = {-0.03, -0.02, -0.01, 0, 1, 5, 6, 7}; | |||
| for (int i = 0; i < 8; ++i) { | |||
| ASSERT_EQ(output[i], expect[i]); | |||
| } | |||
| MS_LOG(INFO) << "TestActivationFp32 passed"; | |||
| } | |||
| TEST_F(TestActivationFp32, SigmoidFp32) { | |||
| float input[8] = {0, 1, 2, 3, 4, 5, 6, 7}; | |||
| float output[8] = {0}; | |||
| Sigmoid(input, 8, output); | |||
| // expect output {0.5, 0.731059, 0.880797, 0.952574, 0.982014, 0.993307, 0.997527, 0.999089}; | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 8; ++i) { | |||
| std::cout << output[i] << " "; | |||
| } | |||
| std::cout << std::endl; | |||
| MS_LOG(INFO) << "TestSigmoidFp32 passed"; | |||
| } | |||
| TEST_F(TestActivationFp32, TanhFp32) { | |||
| float input[7] = {-3, -2, -1, 0, 1, 2, 3}; | |||
| float output[7] = {0}; | |||
| Tanh(input, 7, output); | |||
| float expect[8] = {-0.995055, -0.964028, -0.761594, 0.000000, 0.761594, 0.964028, 0.995055}; | |||
| for (int i = 0; i < 8; ++i) { | |||
| EXPECT_NEAR(output[i], expect[i], 0.00001); | |||
| } | |||
| MS_LOG(INFO) << "TanhFp32 passed"; | |||
| } | |||
| TEST_F(TestActivationFp32, HSwishFp32) { | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||
| ActivationParameter op_param; | |||
| op_param.op_parameter_.type_ = schema::PrimitiveType_Activation; | |||
| op_param.type_ = schema::ActivationType_HSWISH; | |||
| op_param.alpha_ = 0.01; | |||
| std::vector<float> input = {-3.0, -2.0, -1.0, 0.0, 1.0, 5.0, 6.0, 7.0}; | |||
| std::vector<int> in_shape = {8}; | |||
| lite::tensor::Tensor input0_tensor; | |||
| inputs_tensor.push_back(&input0_tensor); | |||
| input0_tensor.SetData(input.data()); | |||
| input0_tensor.set_shape(in_shape); | |||
| std::vector<float> output(8); | |||
| std::vector<int> output_shape = {8}; | |||
| lite::tensor::Tensor output0_tensor; | |||
| outputs_tensor.push_back(&output0_tensor); | |||
| output0_tensor.SetData(output.data()); | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, schema::PrimitiveType_Activation}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| lite::Context ctx; | |||
| ctx.threadNum = 7; | |||
| kernel::LiteKernel *kernel = | |||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc); | |||
| ASSERT_NE(kernel, nullptr); | |||
| auto output_tensor_shape = output0_tensor.shape(); | |||
| kernel->Run(); | |||
| std::vector<float> expect_output = {-0, -0.33333334, -0.33333334, 0, 0.6666667, 5, 6, 7}; | |||
| CompareOutputData(output.data(), expect_output.data(), 8, 0.00001); | |||
| input0_tensor.SetData(nullptr); | |||
| output0_tensor.SetData(nullptr); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,74 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "mindspore/lite/include/context.h" | |||
| namespace mindspore { | |||
| class TestQuantizedAdd : public mindspore::Common { | |||
| public: | |||
| TestQuantizedAdd() {} | |||
| }; | |||
| TEST_F(TestQuantizedAdd, Add) { | |||
| lite::tensor::Tensor in_tensor0(kNumberTypeInt8, {1, 1, 2, 5}); | |||
| lite::tensor::Tensor in_tensor1(kNumberTypeInt8, {1, 1, 2, 5}); | |||
| lite::tensor::Tensor out_tensor(kNumberTypeInt8, {1, 1, 2, 5}); | |||
| int8_t input_data0[] = {-102, 25, -51, 89, -102, 25, -51, 89, -102, 25}; // -0.8 0.2 -0.4 0.7 | |||
| int8_t input_data1[] = {38, 51, 64, -102, 38, 51, 64, -102, 38, 51}; // 0.3 0.4 0.5 -0.8 | |||
| int8_t output_data[10] = {0}; | |||
| in_tensor0.SetData(input_data0); | |||
| in_tensor1.SetData(input_data1); | |||
| out_tensor.SetData(output_data); | |||
| const lite::tensor::QuantArg quant_in0 = {0.00784314f, 0}; // -1.0--1.0 -> 0--255 | |||
| const lite::tensor::QuantArg quant_in1 = {0.00784314f, 0}; | |||
| const lite::tensor::QuantArg quant_out = {0.00784314f, 0}; | |||
| in_tensor0.AddQuantParam(quant_in0); | |||
| in_tensor1.AddQuantParam(quant_in1); | |||
| out_tensor.AddQuantParam(quant_out); | |||
| std::vector<lite::tensor::Tensor *> inputs = {&in_tensor0, &in_tensor1}; | |||
| std::vector<lite::tensor::Tensor *> outputs = {&out_tensor}; | |||
| OpParameter parameter = {}; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, schema::PrimitiveType_Add}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| auto ctx = std::make_shared<lite::Context>(); | |||
| auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc); | |||
| ASSERT_NE(kernel, nullptr); | |||
| auto ret = kernel->Run(); | |||
| EXPECT_EQ(0, ret); | |||
| int8_t expect0[10] = {-64, 76, 13, -13, -64, 76, 13, -13, -64, 76}; // -0.5 0.6 0.1 -0.1 | |||
| for (int i = 0; i < 10; ++i) { | |||
| EXPECT_EQ(output_data[i], expect0[i]); | |||
| } | |||
| in_tensor0.SetData(nullptr); | |||
| in_tensor1.SetData(nullptr); | |||
| out_tensor.SetData(nullptr); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1 @@ | |||
| ýL[?-"R>‰qƒ>{B¸>´?yx?ó×_>JSD>Gº0? | |||
| @@ -0,0 +1 @@ | |||
| J[q? §P?¾ŸŒ>gý?õA?>oo?7G?x¸<¿”"? | |||
| @@ -0,0 +1 @@ | |||
| WÚU>X™8?*Á?!—v>›žF>0î?.ť<�C?Čd? | |||
| @@ -0,0 +1 @@ | |||
| ÜR?Ü]?žÎ>†c~?um?z1->í??Ø'?—U? | |||
| @@ -0,0 +1,2 @@ | |||
| „ë:¿eQÝ¿²c?pº@ÞE(Àoéï=ű*¿Î¢ñ=Í•†¿^C½°ç?Æþ-?ú»=@$Á?ò(ÀW!=à+> æ¿êó@•@§? -¿JP Àµï?€k¿ýüÁ¿þ“?M | |||
| ¾wq‘>3Û=RïÀ¢j @¿E%@!H￸lÀþ�¾=•©=\j/½m2¶>bâ@òB‡¾ | |||
| @@ -0,0 +1 @@ | |||
| ¦êœ¿´Xã>+Î?6Å@?Ü•�¿Çe¥?š;¿–ˆÜ¿Úþ?Æ„R?�ÇÓ?ñýèÀí¾P�¿`Ú„=¼»?æ?¨¥Ž¿ñ�C¿mÄÇ?òßH?õ¤¾<±ž¿ÀŸ€<›º?ÀŸâ<Cû»?À’�=ŽúN?Ü턾�±¿¹ÎÏ¿êà׿°ýB>ŠÚˆ¿�3‘?þ:v¿� ½?�”¾¿-Œ¨? | |||
| @@ -0,0 +1 @@ | |||
| 3:П�х?iпОМ�te?Йй?6\XОМ`^Оj6@h'П>жнМуьд>ЈЌ6П�юђ?в%РсFm?|C)@ешрПџlѕ>FЊЋ@vCП(и*ПМn6@{Ф?�@ТЙ�О,~@d6>@(R@g�?�8@ | |||
| @@ -0,0 +1,3 @@ | |||
| Model: mobilenet_v1_1.0_224_quant | |||
| Input: input | |||
| Output: MobilenetV1/Predictions/Reshape_1 | |||
| @@ -0,0 +1,134 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <vector> | |||
| #include "common/common_test.h" | |||
| #include "backend/kernel_compiler/common_utils.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| class CommonUtilTest : public mindspore::Common { | |||
| public: | |||
| CommonUtilTest() = default; | |||
| }; | |||
| TEST_F(CommonUtilTest, BucketReduceSparseGradient1) { | |||
| // The indices is a vector and the grad is a tensor with shape (6, 2) | |||
| /* 0 | |||
| * 0 | |||
| * 1 | |||
| * 1 | |||
| * 0 | |||
| * 3 | |||
| */ | |||
| std::vector<int> indices{0, 0, 1, 1, 0, 3}; | |||
| /* 0 1 | |||
| * 2 3 | |||
| * 4 5 | |||
| * 6 7 | |||
| * 8 9 | |||
| * 10 11 | |||
| */ | |||
| std::vector<float> grad; | |||
| for (int i = 0; i < 6 * 2; i++) { | |||
| grad.push_back(i); | |||
| } | |||
| std::vector<int> unique_indices(6); | |||
| std::vector<float> summed_grad(12); | |||
| std::vector<int> tmp_indices(6); | |||
| std::vector<float> tmp_grad(12); | |||
| SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 6}); | |||
| SparseGradient workspace_grad({tmp_grad.data(), tmp_indices.data(), 6}); | |||
| SparseGradient input_grad({grad.data(), indices.data(), 6}); | |||
| ReduceSparseGradientParam param; | |||
| param.input_grad_ = &input_grad; | |||
| param.workspace_grad_ = &workspace_grad; | |||
| param.output_grad_ = &unique_grad; | |||
| param.max_index_ = 6; | |||
| param.value_stride_ = 2; | |||
| BucketReduceSparseGradient(param); | |||
| EXPECT_EQ(unique_grad.indices_size_, 3); | |||
| std::vector<int> expect_indices({0, 1, 3}); | |||
| for (size_t i = 0; i < unique_grad.indices_size_; ++i) { | |||
| EXPECT_EQ(unique_grad.indices_[i], expect_indices[i]); | |||
| } | |||
| /* 10 13 | |||
| * 10 12 | |||
| * 10 11 | |||
| */ | |||
| std::vector<int> expect_value({10, 13, 10, 12, 10, 11}); | |||
| for (size_t i = 0; i < unique_grad.indices_size_ * 2; ++i) { | |||
| EXPECT_EQ(unique_grad.value_[i], expect_value[i]); | |||
| } | |||
| } | |||
| TEST_F(CommonUtilTest, BucketReduceSparseGradient2) { | |||
| // The indices is a vector and the grad is a tensor with shape (6, 2) | |||
| /* 0 | |||
| * 0 | |||
| * 1 | |||
| * 1 | |||
| * 0 | |||
| * 6 | |||
| */ | |||
| std::vector<int> indices{0, 0, 1, 1, 0, 6}; | |||
| /* 0 1 | |||
| * 2 3 | |||
| * 4 5 | |||
| * 6 7 | |||
| * 8 9 | |||
| * 10 11 | |||
| */ | |||
| std::vector<float> grad; | |||
| for (int i = 0; i < 6 * 2; i++) { | |||
| grad.push_back(i); | |||
| } | |||
| std::vector<int> unique_indices(6); | |||
| std::vector<float> summed_grad(12); | |||
| std::vector<int> tmp_indices(6); | |||
| std::vector<float> tmp_grad(12); | |||
| SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 6}); | |||
| SparseGradient workspace_grad({tmp_grad.data(), tmp_indices.data(), 6}); | |||
| SparseGradient input_grad({grad.data(), indices.data(), 6}); | |||
| ReduceSparseGradientParam param; | |||
| param.input_grad_ = &input_grad; | |||
| param.workspace_grad_ = &workspace_grad; | |||
| param.output_grad_ = &unique_grad; | |||
| param.max_index_ = 6; | |||
| param.value_stride_ = 2; | |||
| BucketReduceSparseGradient(param); | |||
| EXPECT_EQ(unique_grad.indices_size_, 2); | |||
| std::vector<int> expect_indices({0, 1}); | |||
| for (size_t i = 0; i < unique_grad.indices_size_; ++i) { | |||
| EXPECT_EQ(unique_grad.indices_[i], expect_indices[i]); | |||
| } | |||
| /* 10 13 | |||
| * 10 12 | |||
| */ | |||
| std::vector<int> expect_value({10, 13, 10, 12}); | |||
| for (size_t i = 0; i < unique_grad.indices_size_ * 2; ++i) { | |||
| EXPECT_EQ(unique_grad.value_[i], expect_value[i]); | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,89 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h" | |||
| // using namespace mindspore::kernel; | |||
| // using namespace mindspore::lite; | |||
| // using namespace mindspore; | |||
| namespace mindspore { | |||
| class TestMatMulOpenCL : public mindspore::Common { | |||
| public: | |||
| TestMatMulOpenCL() {} | |||
| }; | |||
| TEST_F(TestMatMulOpenCL, MatMulFp32) { | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| ocl_runtime->Init(); | |||
| size_t input_size; | |||
| int ci = 1280; | |||
| int co = 1001; | |||
| std::string input_path = "./test_data/matmul/matmul_fp32_input.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| size_t weight_size; | |||
| std::string weight_path = "./test_data/matmul/matmul_fp32_weight.bin"; | |||
| auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| lite::tensor::Tensor *tensor_x = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {1, ci}); | |||
| lite::tensor::Tensor *tensor_w = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {co, ci}); | |||
| tensor_w->SetData(weight_data); | |||
| lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(TypeId(kNumberTypeFloat32), {1, co}); | |||
| std::vector<lite::tensor::Tensor *> inputs{tensor_x, tensor_w}; | |||
| std::vector<lite::tensor::Tensor *> outputs{tensor_out}; | |||
| auto *arith_kernel = new MatMulOpenCLKernel(nullptr, inputs, outputs, false); | |||
| arith_kernel->Init(); | |||
| std::vector<LiteKernel *> kernels{arith_kernel}; | |||
| auto *pGraph = new SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| pGraph->Init(); | |||
| memcpy(inputs[0]->Data(), input_data, sizeof(float) * ci); | |||
| pGraph->Run(); | |||
| printf("==================output data=================\n"); | |||
| float *output_data = reinterpret_cast<float *>(tensor_out->Data()); | |||
| std::cout << std::endl; | |||
| for (int i = 0; i < co; i++) { | |||
| std::cout << output_data[i] << ", "; | |||
| } | |||
| std::cout << std::endl; | |||
| size_t output_size; | |||
| std::string output_path = "./test_data/matmul/matmul_fp32_output.bin"; | |||
| auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); | |||
| // compare | |||
| CompareOutputData(output_data, correct_data, co * sizeof(float), 0.00001); | |||
| delete input_data; | |||
| delete weight_data; | |||
| delete tensor_x; | |||
| delete tensor_w; | |||
| delete tensor_out; | |||
| delete correct_data; | |||
| MS_LOG(INFO) << "TestMatMulFp32 passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,35 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include "common/common_test.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #ifndef TESTS_UT_OPENCL_KERNLE_TESTS_H | |||
| #define TESTS_UT_OPENCL_KERNLE_TESTS_H | |||
| namespace mindspore { | |||
| class TestOpenCLKernel : public mindspore::Common { | |||
| public: | |||
| TestOpenCLKernel() {} | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // TESTS_UT_OPENCL_KERNLE_TESTS_H | |||
| @@ -0,0 +1,96 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h" | |||
| // using namespace mindspore::kernel; | |||
| // using namespace mindspore::lite; | |||
| // using namespace mindspore; | |||
| namespace mindspore { | |||
| class TestSoftmaxOpenCL : public mindspore::Common {}; | |||
| void InitSoftaxParam(SoftmaxParameter *param) { param->axis_ = -1; } | |||
| TEST_F(TestSoftmaxOpenCL, SoftmaxFp32) { | |||
| std::cout << "======" << std::endl; | |||
| MS_LOG(INFO) << "start TEST_F TestSoftmaxOpenCL"; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| ocl_runtime->Init(); | |||
| MS_LOG(INFO) << "create SoftmaxParameter"; | |||
| auto param = new SoftmaxParameter(); | |||
| InitSoftaxParam(param); | |||
| MS_LOG(INFO) << "create Tensors"; | |||
| std::vector<int> shape_in = {1, 2, 2, 1}; | |||
| std::vector<int> shape_out = {1, 2, 2, 1}; | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensorType = schema::NodeType_ValueNode; | |||
| lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NCHW, tensorType); | |||
| lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NCHW, tensorType); | |||
| std::vector<lite::tensor::Tensor *> inputs{tensor_in}; | |||
| std::vector<lite::tensor::Tensor *> outputs{tensor_out}; | |||
| MS_LOG(INFO) << "create OpenCL Kernel"; | |||
| auto *Softmax_kernel = new SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| Softmax_kernel->Init(); | |||
| std::vector<LiteKernel *> kernels{Softmax_kernel}; | |||
| MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | |||
| auto *pGraph = new SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| pGraph->Init(); | |||
| MS_LOG(INFO) << "initialize data"; | |||
| std::vector<lite::tensor::Tensor *> tensor_map = {tensor_in}; | |||
| for (auto &tensor_file : tensor_map) { | |||
| auto tensor = tensor_file; | |||
| size_t size = tensor->Size(); | |||
| const float data[4] = {std::log(1.0f), std::log(2.0f), std::log(3.0f), std::log(4.0f)}; | |||
| memcpy(tensor->Data(), data, size); | |||
| } | |||
| MS_LOG(INFO) << "pGraph->Run()"; | |||
| pGraph->Run(); | |||
| MS_LOG(INFO) << "==================output data================="; | |||
| float *output_data = reinterpret_cast<float *>(tensor_out->Data()); | |||
| size_t output_size = tensor_out->Size(); | |||
| printf("output:"); | |||
| for (int i = 0; i < 4; i++) { | |||
| printf("%.3f ", output_data[i]); | |||
| } | |||
| printf("\n"); | |||
| float expect[4] = {1.0f, 2.0f, 3.0f, 4.0f}; | |||
| for (int i = 0; i < tensor_out->ElementsNum(); ++i) { | |||
| if (std::fabs(output_data[i] - expect[i]) > 1e-5) { | |||
| printf("idx[%d] except=%.3f output=%.3f .", i, expect[i], output_data[i]); | |||
| } | |||
| } | |||
| printf("\nTest all close OK for %zu!\n", output_size); | |||
| lite::CompareOutputData(output_data, expect, 4); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,287 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cmath> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "utils/base_ref_utils.h" | |||
| #include "mindspore/lite/schema/inner/model_generated.h" | |||
| #include "mindspore/lite/src/train/model_impl.h" | |||
| #include "mindspore/lite/include/model.h" | |||
| #include "mindspore/lite/src/train/train_session.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| namespace mindspore { | |||
| class TrainTest : public mindspore::Common { | |||
| public: | |||
| TrainTest() {} | |||
| }; | |||
| TEST_F(TrainTest, TestConvNode) { | |||
| auto meta_graph = std::make_shared<schema::MetaGraphT>(); | |||
| meta_graph->name = "graph"; | |||
| auto node = std::make_unique<schema::CNodeT>(); | |||
| node->inputIndex = {0, 1}; | |||
| node->outputIndex = {2}; | |||
| node->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| node->primitive->value.type = schema::PrimitiveType_Conv2D; | |||
| auto primitive = new schema::Conv2DT; | |||
| primitive->padMode = schema::PadMode_SAME; | |||
| primitive->channelIn = 3; | |||
| primitive->channelOut = 32; | |||
| primitive->format = schema::Format_NHWC; | |||
| primitive->strideH = 1; | |||
| primitive->strideW = 1; | |||
| primitive->kernelH = 3; | |||
| primitive->kernelW = 3; | |||
| primitive->dilateH = 1; | |||
| primitive->dilateW = 1; | |||
| node->primitive->value.value = primitive; | |||
| node->name = "Conv2D"; | |||
| meta_graph->nodes.emplace_back(std::move(node)); | |||
| meta_graph->inputIndex = {0}; | |||
| meta_graph->outputIndex = {2}; | |||
| auto input0 = std::make_unique<schema::TensorT>(); | |||
| input0->nodeType = schema::NodeType::NodeType_Parameter; // todo use ValueNode? | |||
| input0->format = schema::Format_NHWC; | |||
| input0->dataType = TypeId::kNumberTypeFloat32; | |||
| input0->dims = {1, 28, 28, 3}; | |||
| input0->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(input0)); | |||
| auto weight = std::make_unique<schema::TensorT>(); | |||
| weight->nodeType = schema::NodeType::NodeType_ValueNode; | |||
| weight->format = schema::Format_KHWC; | |||
| weight->dataType = TypeId::kNumberTypeFloat32; | |||
| weight->dims = {32, 3, 3, 3}; | |||
| auto buf = new char *[1]; | |||
| //================================================================ | |||
| size_t weight_size; | |||
| std::string weight_path = "./convfp32_weight_32_3_3_3.bin"; | |||
| ReadFile(weight_path.c_str(), &weight_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto weight_data_temp = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, weight_data_temp); | |||
| weight->data.resize(sizeof(float) * 32 * 3 * 3 * 3); | |||
| //================================================================ | |||
| memcpy(weight->data.data(), weight_data_temp, weight_size); | |||
| weight->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(weight)); | |||
| auto output = std::make_unique<schema::TensorT>(); | |||
| output->nodeType = schema::NodeType::NodeType_Parameter; | |||
| output->format = schema::Format_NHWC; | |||
| output->dataType = TypeId::kNumberTypeFloat32; | |||
| output->dims = {1, 28, 28, 32}; | |||
| output->offset = -1; | |||
| meta_graph->allTensors.emplace_back(std::move(output)); | |||
| flatbuffers::FlatBufferBuilder builder(1024); | |||
| auto offset = schema::MetaGraph::Pack(builder, meta_graph.get()); | |||
| builder.Finish(offset); | |||
| size_t size = builder.GetSize(); | |||
| const char *content = reinterpret_cast<char *>(builder.GetBufferPointer()); | |||
| auto model = lite::Model::Import(content, size); | |||
| ASSERT_NE(nullptr, model); | |||
| auto session = new session::TrainSession(); // inference::MSSession::CreateSession(kCPUDevice, 0); | |||
| ASSERT_NE(nullptr, session); | |||
| auto graphId = session->CompileGraph(NOT_NULL(model->GetModelImpl())); | |||
| auto inTensor = new tensor::Tensor(TypeId::kNumberTypeFloat32, {1, 28, 28, 3}); | |||
| ASSERT_NE(nullptr, inTensor); | |||
| ASSERT_EQ(sizeof(float) * (28 * 28 * 3), inTensor->Size()); | |||
| auto ret = inTensor->MallocData(); | |||
| ASSERT_EQ(0, ret); | |||
| auto data = inTensor->Data(); | |||
| //=================================================== | |||
| size_t input_size; | |||
| std::string input_path = "./convfp32_input_1_28_28_3.bin"; | |||
| ReadFile(input_path.c_str(), &input_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto input_data = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, input_data); | |||
| //=================================================== | |||
| memcpy(data, input_data, input_size); | |||
| std::vector<std::shared_ptr<tensor::Tensor>> inputs; | |||
| inputs.emplace_back(inTensor); | |||
| VectorRef outputsRef; | |||
| session->RunGraph(graphId, inputs, &outputsRef); | |||
| auto outputs = TransformVectorRefToMultiTensor(outputsRef); | |||
| ASSERT_EQ(1, outputs.size()); | |||
| ASSERT_EQ(1, outputs.front().size()); | |||
| auto runOutput = outputs.front().front(); | |||
| ASSERT_NE(nullptr, runOutput); | |||
| ASSERT_EQ(28 * 28 * 32, runOutput->ElementsNum()); | |||
| ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type()); | |||
| auto *outData = reinterpret_cast<float *>(runOutput->MutableData()); | |||
| //=================================================== | |||
| size_t output_size; | |||
| std::string output_path = "./convfp32_out_1_28_28_32.bin"; | |||
| ReadFile(output_path.c_str(), &output_size, buf); | |||
| ASSERT_NE(nullptr, buf[0]); | |||
| auto output_data = reinterpret_cast<float *>(buf[0]); | |||
| ASSERT_NE(nullptr, output_data); | |||
| //=================================================== | |||
| ASSERT_EQ(output_size, runOutput->Size()); | |||
| for (size_t i = 0; i < runOutput->ElementsNum(); i++) { | |||
| ASSERT_EQ(output_data[i], outData[i]); | |||
| } | |||
| MS_LOG(INFO) << "Passed"; | |||
| } | |||
| // TEST_F(TrainTest, TestMultiNode) { | |||
| // auto msGraph = std::make_shared<schema::GraphDefT>(); | |||
| // msGraph->name = "graph"; | |||
| // auto msSubgraph = std::make_unique<schema::SubGraphDefT>(); | |||
| // msSubgraph->name = "subGraph"; | |||
| // | |||
| // auto conv = std::make_unique<schema::OpDefT>(); | |||
| // conv->inputIndex = {0, 1}; | |||
| // conv->outputIndex = {2}; | |||
| // conv->attr.type = schema::OpT_Conv2D; | |||
| // auto conv_attr = new schema::Conv2DT; | |||
| // conv_attr->padMode = schema::PadMode_SAME; | |||
| // conv_attr->format = schema::Format_NHWC; | |||
| // conv_attr->strideH = 1; | |||
| // conv_attr->strideW = 1; | |||
| // conv_attr->kernelH = 3; | |||
| // conv_attr->kernelW = 3; | |||
| // conv_attr->dilateH = 1; | |||
| // conv_attr->dilateW = 1; | |||
| // | |||
| // conv->attr.value = conv_attr; | |||
| // conv->name = "Conv2D"; | |||
| // conv->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(conv)); | |||
| // | |||
| // auto matMul1 = std::make_unique<schema::OpDefT>(); | |||
| // matMul1->inputIndex = {2, 3}; | |||
| // matMul1->outputIndex = {4}; | |||
| // matMul1->attr.type = schema::OpT_MatMul; | |||
| // auto matMul_attr1 = new schema::MatMulT; | |||
| // matMul_attr1->transposeA = false; | |||
| // matMul_attr1->transposeB = true; | |||
| // matMul1->attr.value = matMul_attr1; | |||
| // matMul1->name = "matmul1"; | |||
| // matMul1->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(matMul1)); | |||
| // | |||
| // auto matMul2 = std::make_unique<schema::OpDefT>(); | |||
| // matMul2->inputIndex = {4, 5}; | |||
| // matMul2->outputIndex = {6}; | |||
| // matMul2->attr.type = schema::OpT_MatMul; | |||
| // auto matMul_attr2 = new schema::MatMulT; | |||
| // matMul_attr2->transposeA = false; | |||
| // matMul_attr2->transposeB = true; | |||
| // matMul2->attr.value = matMul_attr2; | |||
| // matMul2->name = "matmul2"; | |||
| // matMul2->fmkType = schema::FmkType_CAFFE; | |||
| // msSubgraph->nodes.emplace_back(std::move(matMul2)); | |||
| // | |||
| // msSubgraph->inputIndex = {0}; | |||
| // msSubgraph->outputIndex = {6}; | |||
| // | |||
| // auto input0 = std::make_unique<schema::TensorDefT>(); | |||
| // input0->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // input0->format = schema::Format_NHWC; | |||
| // input0->dataType = TypeId::kNumberTypeFloat32; | |||
| // input0->dims = {1, 5, 5, 3}; | |||
| // input0->offset = -1; | |||
| // msSubgraph->allTensors.emplace_back(std::move(input0)); | |||
| // | |||
| // auto conv_weight = std::make_unique<schema::TensorDefT>(); | |||
| // conv_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // conv_weight->format = schema::Format_KHWC; | |||
| // conv_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // conv_weight->dims = {8, 3, 3, 3}; | |||
| // conv_weight->data.resize(8*3*3*3*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(conv_weight)); | |||
| // | |||
| // auto conv_output = std::make_unique<schema::TensorDefT>(); | |||
| // conv_output->refCount = 0; | |||
| // conv_output->format = schema::Format_NHWC; | |||
| // conv_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // conv_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(conv_output)); | |||
| // | |||
| // auto add_weight = std::make_unique<schema::TensorDefT>(); | |||
| // add_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // add_weight->format = schema::Format_NHWC; | |||
| // add_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // add_weight->dims = {1, 5, 5, 8}; | |||
| // add_weight->data.resize(5*5*8*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(add_weight)); | |||
| // | |||
| // auto add_output = std::make_unique<schema::TensorDefT>(); | |||
| // add_output->refCount = 0; | |||
| // add_output->format = schema::Format_NHWC; | |||
| // add_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // add_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(add_output)); | |||
| // | |||
| // auto mul_weight = std::make_unique<schema::TensorDefT>(); | |||
| // mul_weight->refCount = schema::MSCONST_WEIGHT_REFCOUNT; | |||
| // mul_weight->format = schema::Format_NHWC; | |||
| // mul_weight->dataType = TypeId::kNumberTypeFloat32; | |||
| // mul_weight->dims = {1, 5, 5, 8}; | |||
| // mul_weight->data.resize(5*5*8*sizeof(float)); | |||
| // msSubgraph->allTensors.emplace_back(std::move(mul_weight)); | |||
| // | |||
| // auto mul_output = std::make_unique<schema::TensorDefT>(); | |||
| // mul_output->refCount = 0; | |||
| // mul_output->format = schema::Format_NHWC; | |||
| // mul_output->dataType = TypeId::kNumberTypeFloat32; | |||
| // mul_output->dims = {1, 5, 5, 8}; | |||
| // msSubgraph->allTensors.emplace_back(std::move(mul_output)); | |||
| // msGraph->subgraphs.emplace_back(std::move(msSubgraph)); | |||
| // | |||
| // flatbuffers::FlatBufferBuilder builder(1024); | |||
| // auto offset = schema::GraphDef::Pack(builder, msGraph.get()); | |||
| // builder.Finish(offset); | |||
| // size_t size = builder.GetSize(); | |||
| // const char *content = (char *)builder.GetBufferPointer(); | |||
| // const std::string strstub = ""; | |||
| // | |||
| // auto func_graph = inference::LoadModel(content, size, strstub); | |||
| // ASSERT_NE(nullptr, func_graph); | |||
| // auto session = inference::MSSession::CreateSession(kCPUDevice, 0); | |||
| // ASSERT_NE(nullptr, session); | |||
| // auto graphId = session->CompileGraph(func_graph); | |||
| // | |||
| // auto inTensor = | |||
| // std::shared_ptr<inference::MSTensor>(inference::MSTensor::CreateTensor(TypeId::kNumberTypeFloat32, {1, 5, 5, 3})); | |||
| // ASSERT_NE(nullptr, inTensor); | |||
| // ASSERT_EQ(sizeof(float) * (5 * 5 * 3), inTensor->Size()); | |||
| // (void)inTensor->MutableData(); | |||
| // | |||
| // std::vector<std::shared_ptr<inference::MSTensor>> inputs; | |||
| // inputs.emplace_back(inTensor); | |||
| // auto outputs = session->RunGraph(graphId, inputs); | |||
| // ASSERT_EQ(1, outputs.size()); | |||
| // ASSERT_EQ(1, outputs.front().size()); | |||
| // auto runOutput = outputs.front().front(); | |||
| // ASSERT_NE(nullptr, runOutput); | |||
| // ASSERT_EQ(5 * 5 * 8, runOutput->ElementsNum()); | |||
| // ASSERT_EQ(TypeId::kNumberTypeFloat32, runOutput->data_type()); | |||
| // MS_LOG(INFO) << "Passed"; | |||
| // } | |||
| } // namespace mindspore | |||