| @@ -32,6 +32,7 @@ | |||
| #endif | |||
| #ifdef ENABLE_ACL | |||
| #include "minddata/dataset/core/ascend_resource.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" | |||
| #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h" | |||
| #endif | |||
| @@ -42,6 +43,15 @@ using json = nlohmann::json; | |||
| struct Execute::ExtraInfo { | |||
| std::multimap<std::string, std::vector<uint32_t>> aipp_cfg_; | |||
| bool init_with_shared_ptr_ = true; // Initial execute object with shared_ptr as default | |||
| #ifdef ENABLE_ACL | |||
| std::multimap<std::string, std::string> op2para_map_ = {{vision::kDvppCropJpegOperation, "size"}, | |||
| {vision::kDvppDecodeResizeOperation, "size"}, | |||
| {vision::kDvppDecodeResizeCropOperation, "crop_size"}, | |||
| {vision::kDvppDecodeResizeCropOperation, "resize_size"}, | |||
| {vision::kDvppNormalizeOperation, "mean"}, | |||
| {vision::kDvppNormalizeOperation, "std"}, | |||
| {vision::kDvppResizeJpegOperation, "size"}}; | |||
| #endif | |||
| }; | |||
| // FIXME - Temporarily overload Execute to support both TensorOperation and TensorTransform | |||
| @@ -221,6 +231,7 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor | |||
| // Parse TensorTransform transforms_ into TensorOperation ops_ | |||
| if (info_->init_with_shared_ptr_) { | |||
| RETURN_IF_NOT_OK(ParseTransforms_()); | |||
| info_->init_with_shared_ptr_ = false; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided"); | |||
| @@ -285,11 +296,13 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor | |||
| device_input = std::move(device_output); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(device_input->HasDeviceData(), "Apply transform failed, output tensor has no data"); | |||
| std::shared_ptr<mindspore::dataset::Tensor> host_output; | |||
| // TODO(lizhenglong) waiting for computing department development, hence we pop data onto host temporarily. | |||
| RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output)); | |||
| *output = mindspore::MSTensor(std::make_shared<DETensor>(host_output)); | |||
| // *output = mindspore::MSTensor(std::make_shared<DETensor>(device_input, true)); Use in the future | |||
| // std::shared_ptr<mindspore::dataset::Tensor> host_output; | |||
| // RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output)); | |||
| // *output = mindspore::MSTensor(std::make_shared<DETensor>(host_output)); | |||
| *output = mindspore::MSTensor(std::make_shared<DETensor>(device_input, true)); | |||
| #endif | |||
| } | |||
| return Status::OK(); | |||
| @@ -306,6 +319,7 @@ Status Execute::operator()(const std::vector<MSTensor> &input_tensor_list, std:: | |||
| // Parse TensorTransform transforms_ into TensorOperation ops_ | |||
| if (info_->init_with_shared_ptr_) { | |||
| RETURN_IF_NOT_OK(ParseTransforms_()); | |||
| info_->init_with_shared_ptr_ = false; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided"); | |||
| @@ -386,6 +400,7 @@ Status Execute::operator()(const std::vector<MSTensor> &input_tensor_list, std:: | |||
| std::vector<uint32_t> AippSizeFilter(const std::vector<uint32_t> &resize_para, const std::vector<uint32_t> &crop_para) { | |||
| std::vector<uint32_t> aipp_size; | |||
| // Special condition where (no Crop and no Resize) or (no Crop and resize with fixed ratio) will lead to dynamic input | |||
| if ((resize_para.size() == 0 || resize_para.size() == 1) && crop_para.size() == 0) { | |||
| aipp_size = {0, 0}; | |||
| @@ -408,6 +423,11 @@ std::vector<uint32_t> AippSizeFilter(const std::vector<uint32_t> &resize_para, c | |||
| : crop_para; | |||
| } | |||
| } | |||
| #ifdef ENABLE_ACL | |||
| aipp_size[0] = DVPP_ALIGN_UP(aipp_size[0], VPC_HEIGHT_ALIGN); // H | |||
| aipp_size[1] = DVPP_ALIGN_UP(aipp_size[1], VPC_WIDTH_ALIGN); // W | |||
| #endif | |||
| return aipp_size; | |||
| } | |||
| @@ -489,6 +509,7 @@ std::string Execute::AippCfgGenerator() { | |||
| #ifdef ENABLE_ACL | |||
| if (info_->init_with_shared_ptr_) { | |||
| ParseTransforms_(); | |||
| info_->init_with_shared_ptr_ = false; | |||
| } | |||
| std::vector<uint32_t> paras; // Record the parameters value of each Ascend operators | |||
| for (int32_t i = 0; i < ops_.size(); i++) { | |||
| @@ -501,15 +522,9 @@ std::string Execute::AippCfgGenerator() { | |||
| // Define map between operator name and parameter name | |||
| ops_[i]->to_json(&ir_info); | |||
| std::multimap<std::string, std::string> op_list = {{vision::kDvppCropJpegOperation, "size"}, | |||
| {vision::kDvppDecodeResizeOperation, "size"}, | |||
| {vision::kDvppDecodeResizeCropOperation, "crop_size"}, | |||
| {vision::kDvppDecodeResizeCropOperation, "resize_size"}, | |||
| {vision::kDvppNormalizeOperation, "mean"}, | |||
| {vision::kDvppNormalizeOperation, "std"}, | |||
| {vision::kDvppResizeJpegOperation, "size"}}; | |||
| // Collect the information of operators | |||
| for (auto pos = op_list.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) { | |||
| for (auto pos = info_->op2para_map_.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) { | |||
| auto paras_key_word = pos.first->second; | |||
| paras = ir_info[paras_key_word].get<std::vector<uint32_t>>(); | |||
| info_->aipp_cfg_.insert(std::make_pair(ops_[i]->Name(), paras)); | |||
| @@ -578,6 +593,11 @@ std::string Execute::AippCfgGenerator() { | |||
| } | |||
| outfile << "}"; | |||
| outfile.close(); | |||
| } else { // For case GPU or CPU | |||
| outfile << "aipp_op {" << std::endl << "}"; | |||
| outfile.close(); | |||
| MS_LOG(WARNING) << "Your runtime environment is not Ascend310, this config file will lead to undefined behavior on " | |||
| "computing result. Please check that."; | |||
| } | |||
| #endif | |||
| return config_location; | |||
| @@ -608,8 +628,9 @@ Status Execute::ParseTransforms_() { | |||
| } | |||
| Status Execute::validate_device_() { | |||
| if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310) { | |||
| std::string err_msg = "Your input device is not supported. (Option: CPU or Ascend310)"; | |||
| if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310 && | |||
| device_type_ != MapTargetDevice::kGpu) { | |||
| std::string err_msg = "Your input device is not supported. (Option: CPU or GPU or Ascend310)"; | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -18,6 +18,7 @@ | |||
| #include "include/api/types.h" | |||
| #include "minddata/dataset/include/type_id.h" | |||
| #include "minddata/dataset/core/ascend_resource.h" | |||
| #include "minddata/dataset/kernels/image/image_utils.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -59,6 +60,10 @@ Status AscendResource::Sink(const mindspore::MSTensor &host_input, std::shared_p | |||
| MSTypeToDEType(static_cast<TypeId>(host_input.DataType())), | |||
| (const uchar *)(host_input.Data().get()), &de_input); | |||
| RETURN_IF_NOT_OK(rc); | |||
| if (!IsNonEmptyJPEG(de_input)) { | |||
| RETURN_STATUS_UNEXPECTED("Dvpp operators can only support processing JPEG image"); | |||
| } | |||
| APP_ERROR ret = processor_->H2D_Sink(de_input, *device_input); | |||
| if (ret != APP_ERR_OK) { | |||
| ascend_resource_->Release(); | |||
| @@ -87,8 +87,8 @@ const std::vector<int64_t> &DETensor::Shape() const { return shape_; } | |||
| std::shared_ptr<const void> DETensor::Data() const { | |||
| #ifndef ENABLE_ANDROID | |||
| if (is_device_) { | |||
| MS_LOG(ERROR) << "Data() always return the data on the host."; | |||
| return nullptr; | |||
| ASSERT_NULL(device_tensor_impl_); | |||
| return std::shared_ptr<const void>(device_tensor_impl_->GetHostBuffer(), [](const void *) {}); | |||
| } | |||
| #endif | |||
| return std::shared_ptr<const void>(tensor_impl_->GetBuffer(), [](const void *) {}); | |||
| @@ -16,6 +16,9 @@ | |||
| #include "minddata/dataset/core/global_context.h" | |||
| #include "minddata/dataset/core/device_tensor.h" | |||
| #ifdef ENABLE_ACL | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h" | |||
| #endif | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -25,6 +28,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten | |||
| std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | |||
| data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | |||
| device_data_type_ = type; | |||
| host_data_tensor_ = nullptr; | |||
| } | |||
| Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) { | |||
| @@ -80,6 +84,20 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data | |||
| return Status::OK(); | |||
| } | |||
| const unsigned char *DeviceTensor::GetHostBuffer() { | |||
| #ifdef ENABLE_ACL | |||
| Status rc = DataPop_(&host_data_tensor_); | |||
| if (!rc.IsOk()) { | |||
| MS_LOG(ERROR) << "Pop device data onto host fail, a nullptr will be returned"; | |||
| return nullptr; | |||
| } | |||
| #endif | |||
| if (!host_data_tensor_) { | |||
| return nullptr; | |||
| } | |||
| return host_data_tensor_->GetBuffer(); | |||
| } | |||
| uint8_t *DeviceTensor::GetDeviceBuffer() { return device_data_; } | |||
| uint8_t *DeviceTensor::GetDeviceMutableBuffer() { return device_data_; } | |||
| @@ -109,5 +127,42 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) { | |||
| size_ = new_size; | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_ACL | |||
| Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) { | |||
| void *resHostBuf = nullptr; | |||
| APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize()); | |||
| if (ret != APP_ERR_OK) { | |||
| MS_LOG(ERROR) << "Failed to allocate memory from host ret = " << ret; | |||
| return Status(StatusCode::kMDNoSpace); | |||
| } | |||
| std::shared_ptr<void> outBuf(resHostBuf, aclrtFreeHost); | |||
| auto processedInfo_ = outBuf; | |||
| // Memcpy the output data from device to host | |||
| ret = aclrtMemcpy(outBuf.get(), this->DeviceDataSize(), this->GetDeviceBuffer(), this->DeviceDataSize(), | |||
| ACL_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret != APP_ERR_OK) { | |||
| MS_LOG(ERROR) << "Failed to copy memory from device to host, ret = " << ret; | |||
| return Status(StatusCode::kMDOutOfMemory); | |||
| } | |||
| auto data = std::static_pointer_cast<unsigned char>(processedInfo_); | |||
| unsigned char *ret_ptr = data.get(); | |||
| mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize(); | |||
| const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1}); | |||
| uint32_t _output_width_ = this->GetYuvStrideShape()[0]; | |||
| uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1]; | |||
| uint32_t _output_height_ = this->GetYuvStrideShape()[2]; | |||
| uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3]; | |||
| const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8); | |||
| mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor); | |||
| (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_); | |||
| if (!(*host_tensor)->HasData()) { | |||
| return Status(StatusCode::kMCDeviceError); | |||
| } | |||
| MS_LOG(INFO) << "Successfully pop DeviceTensor data onto host"; | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -43,6 +43,8 @@ class DeviceTensor : public Tensor { | |||
| const uint32_t &dataSize, const std::vector<uint32_t> &attributes, | |||
| std::shared_ptr<DeviceTensor> *out); | |||
| const unsigned char *GetHostBuffer(); | |||
| uint8_t *GetDeviceBuffer(); | |||
| uint8_t *GetDeviceMutableBuffer(); | |||
| @@ -61,6 +63,10 @@ class DeviceTensor : public Tensor { | |||
| Status SetYuvStrideShape_(const uint32_t &width, const uint32_t &widthStride, const uint32_t &height, | |||
| const uint32_t &heightStride); | |||
| #ifdef ENABLE_ACL | |||
| Status DataPop_(std::shared_ptr<Tensor> *host_tensor); | |||
| #endif | |||
| std::vector<uint32_t> YUV_shape_; // YUV_shape_ = {width, widthStride, height, heightStride} | |||
| uint8_t *device_data_; | |||
| @@ -68,6 +74,9 @@ class DeviceTensor : public Tensor { | |||
| uint32_t size_; | |||
| DataType device_data_type_; | |||
| // We use this Tensor to store device_data when DeviceTensor pop onto host | |||
| std::shared_ptr<Tensor> host_data_tensor_; | |||
| }; | |||
| } // namespace dataset | |||
| @@ -67,7 +67,8 @@ TEST_F(TestDE, TestDvpp) { | |||
| #ifdef ENABLE_ACL | |||
| // Read images from target directory | |||
| std::shared_ptr<mindspore::dataset::Tensor> de_tensor; | |||
| mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor)); | |||
| // Define dvpp transform | |||
| @@ -77,13 +78,13 @@ TEST_F(TestDE, TestDvpp) { | |||
| mindspore::dataset::Execute Transform(decode_resize_crop, MapTargetDevice::kAscend310); | |||
| // Apply transform on images | |||
| Status rc = Transform(image, &image); | |||
| rc = Transform(image, &image); | |||
| std::string aipp_cfg = Transform.AippCfgGenerator(); | |||
| ASSERT_EQ(aipp_cfg, "./aipp.cfg"); | |||
| // Check image info | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| ASSERT_EQ(image.Shape().size(), 3); | |||
| ASSERT_EQ(image.Shape().size(), 2); | |||
| int32_t real_h = 0; | |||
| int32_t real_w = 0; | |||
| int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; | |||
| @@ -94,15 +95,21 @@ TEST_F(TestDE, TestDvpp) { | |||
| real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; | |||
| real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; | |||
| } | |||
| /* TODO Use in the future after compute college finish their job | |||
| ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], real_w); | |||
| ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); | |||
| */ | |||
| ASSERT_TRUE(image.Data().get() != nullptr); | |||
| ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); | |||
| ASSERT_EQ(image.IsDevice(), true); | |||
| /* This is the criterion for previous method(Without pop) | |||
| ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], 1); | |||
| ASSERT_EQ(image.Shape()[2], 1); | |||
| ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); | |||
| */ | |||
| #endif | |||
| } | |||
| @@ -110,7 +117,8 @@ TEST_F(TestDE, TestDvppSinkMode) { | |||
| #ifdef ENABLE_ACL | |||
| // Read images from target directory | |||
| std::shared_ptr<mindspore::dataset::Tensor> de_tensor; | |||
| mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor)); | |||
| // Define dvpp transform | |||
| @@ -123,11 +131,11 @@ TEST_F(TestDE, TestDvppSinkMode) { | |||
| mindspore::dataset::Execute Transform(trans_list, MapTargetDevice::kAscend310); | |||
| // Apply transform on images | |||
| Status rc = Transform(image, &image); | |||
| rc = Transform(image, &image); | |||
| // Check image info | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| ASSERT_EQ(image.Shape().size(), 3); | |||
| ASSERT_EQ(image.Shape().size(), 2); | |||
| int32_t real_h = 0; | |||
| int32_t real_w = 0; | |||
| int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; | |||
| @@ -138,10 +146,13 @@ TEST_F(TestDE, TestDvppSinkMode) { | |||
| real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; | |||
| real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; | |||
| } | |||
| ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], 1); | |||
| ASSERT_EQ(image.Shape()[2], 1); | |||
| ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], real_w); | |||
| ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); | |||
| ASSERT_TRUE(image.Data().get() != nullptr); | |||
| ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); | |||
| ASSERT_EQ(image.IsDevice(), true); | |||
| Transform.DeviceMemoryRelease(); | |||
| #endif | |||
| } | |||
| @@ -149,7 +160,8 @@ TEST_F(TestDE, TestDvppSinkMode) { | |||
| TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { | |||
| #ifdef ENABLE_ACL | |||
| std::shared_ptr<mindspore::dataset::Tensor> de_tensor; | |||
| mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor)); | |||
| // Define dvpp transform | |||
| @@ -170,11 +182,11 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { | |||
| ASSERT_EQ(aipp_cfg, "./aipp.cfg"); | |||
| // Apply transform on images | |||
| Status rc = Transform(image, &image); | |||
| rc = Transform(image, &image); | |||
| // Check image info | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| ASSERT_EQ(image.Shape().size(), 3); | |||
| ASSERT_EQ(image.Shape().size(), 2); | |||
| int32_t real_h = 0; | |||
| int32_t real_w = 0; | |||
| int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; | |||
| @@ -185,10 +197,14 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { | |||
| real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; | |||
| real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; | |||
| } | |||
| ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], 1); | |||
| ASSERT_EQ(image.Shape()[2], 1); | |||
| ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte | |||
| ASSERT_EQ(image.Shape()[1], real_w); | |||
| ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); | |||
| ASSERT_TRUE(image.Data().get() != nullptr); | |||
| ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); | |||
| ASSERT_EQ(image.IsDevice(), true); | |||
| Transform.DeviceMemoryRelease(); | |||
| #endif | |||
| } | |||