Fix dvpp input validation and Aipp config generation issue

4 years ago · 2474a0237f
--- a/mindspore/ccsrc/minddata/dataset/api/execute.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/execute.cc
@@ -32,6 +32,7 @@
 #endif
 #ifdef ENABLE_ACL
 #include "minddata/dataset/core/ascend_resource.h"
 #include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
 #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h"
 #endif

@@ -42,6 +43,15 @@ using json = nlohmann::json;
 struct Execute::ExtraInfo {
  std::multimap<std::string, std::vector<uint32_t>> aipp_cfg_;
  bool init_with_shared_ptr_ = true;  // Initial execute object with shared_ptr as default
 #ifdef ENABLE_ACL
  std::multimap<std::string, std::string> op2para_map_ = {{vision::kDvppCropJpegOperation, "size"},
                                                          {vision::kDvppDecodeResizeOperation, "size"},
                                                          {vision::kDvppDecodeResizeCropOperation, "crop_size"},
                                                          {vision::kDvppDecodeResizeCropOperation, "resize_size"},
                                                          {vision::kDvppNormalizeOperation, "mean"},
                                                          {vision::kDvppNormalizeOperation, "std"},
                                                          {vision::kDvppResizeJpegOperation, "size"}};
 #endif
 };

 // FIXME - Temporarily overload Execute to support both TensorOperation and TensorTransform
@@ -221,6 +231,7 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor
  // Parse TensorTransform transforms_ into TensorOperation ops_
  if (info_->init_with_shared_ptr_) {
    RETURN_IF_NOT_OK(ParseTransforms_());
    info_->init_with_shared_ptr_ = false;
  }
  CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided");

@@ -285,11 +296,13 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor
      device_input = std::move(device_output);
    }
    CHECK_FAIL_RETURN_UNEXPECTED(device_input->HasDeviceData(), "Apply transform failed, output tensor has no data");
    std::shared_ptr<mindspore::dataset::Tensor> host_output;

    // TODO(lizhenglong) waiting for computing department development, hence we pop data onto host temporarily.
    RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output));
    *output = mindspore::MSTensor(std::make_shared<DETensor>(host_output));
    // *output = mindspore::MSTensor(std::make_shared<DETensor>(device_input, true)); Use in the future
    // std::shared_ptr<mindspore::dataset::Tensor> host_output;
    // RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output));
    // *output = mindspore::MSTensor(std::make_shared<DETensor>(host_output));

    *output = mindspore::MSTensor(std::make_shared<DETensor>(device_input, true));
 #endif
  }
  return Status::OK();
@@ -306,6 +319,7 @@ Status Execute::operator()(const std::vector<MSTensor> &input_tensor_list, std::
  // Parse TensorTransform transforms_ into TensorOperation ops_
  if (info_->init_with_shared_ptr_) {
    RETURN_IF_NOT_OK(ParseTransforms_());
    info_->init_with_shared_ptr_ = false;
  }
  CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided");

@@ -386,6 +400,7 @@ Status Execute::operator()(const std::vector<MSTensor> &input_tensor_list, std::

 std::vector<uint32_t> AippSizeFilter(const std::vector<uint32_t> &resize_para, const std::vector<uint32_t> &crop_para) {
  std::vector<uint32_t> aipp_size;

  // Special condition where (no Crop and no Resize) or (no Crop and resize with fixed ratio) will lead to dynamic input
  if ((resize_para.size() == 0 || resize_para.size() == 1) && crop_para.size() == 0) {
    aipp_size = {0, 0};
@@ -408,6 +423,11 @@ std::vector<uint32_t> AippSizeFilter(const std::vector<uint32_t> &resize_para, c
          : crop_para;
    }
  }

 #ifdef ENABLE_ACL
  aipp_size[0] = DVPP_ALIGN_UP(aipp_size[0], VPC_HEIGHT_ALIGN);  // H
  aipp_size[1] = DVPP_ALIGN_UP(aipp_size[1], VPC_WIDTH_ALIGN);   // W
 #endif
  return aipp_size;
 }

@@ -489,6 +509,7 @@ std::string Execute::AippCfgGenerator() {
 #ifdef ENABLE_ACL
  if (info_->init_with_shared_ptr_) {
    ParseTransforms_();
    info_->init_with_shared_ptr_ = false;
  }
  std::vector<uint32_t> paras;  // Record the parameters value of each Ascend operators
  for (int32_t i = 0; i < ops_.size(); i++) {
@@ -501,15 +522,9 @@ std::string Execute::AippCfgGenerator() {

    // Define map between operator name and parameter name
    ops_[i]->to_json(&ir_info);
    std::multimap<std::string, std::string> op_list = {{vision::kDvppCropJpegOperation, "size"},
                                                       {vision::kDvppDecodeResizeOperation, "size"},
                                                       {vision::kDvppDecodeResizeCropOperation, "crop_size"},
                                                       {vision::kDvppDecodeResizeCropOperation, "resize_size"},
                                                       {vision::kDvppNormalizeOperation, "mean"},
                                                       {vision::kDvppNormalizeOperation, "std"},
                                                       {vision::kDvppResizeJpegOperation, "size"}};

    // Collect the information of operators
    for (auto pos = op_list.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) {
    for (auto pos = info_->op2para_map_.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) {
      auto paras_key_word = pos.first->second;
      paras = ir_info[paras_key_word].get<std::vector<uint32_t>>();
      info_->aipp_cfg_.insert(std::make_pair(ops_[i]->Name(), paras));
@@ -578,6 +593,11 @@ std::string Execute::AippCfgGenerator() {
    }
    outfile << "}";
    outfile.close();
  } else {  // For case GPU or CPU
    outfile << "aipp_op {" << std::endl << "}";
    outfile.close();
    MS_LOG(WARNING) << "Your runtime environment is not Ascend310, this config file will lead to undefined behavior on "
                       "computing result. Please check that.";
  }
 #endif
  return config_location;
@@ -608,8 +628,9 @@ Status Execute::ParseTransforms_() {
 }

 Status Execute::validate_device_() {
  if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310) {
    std::string err_msg = "Your input device is not supported. (Option: CPU or Ascend310)";
  if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310 &&
      device_type_ != MapTargetDevice::kGpu) {
    std::string err_msg = "Your input device is not supported. (Option: CPU or GPU or Ascend310)";
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_UNEXPECTED(err_msg);
  }
--- a/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc
@@ -18,6 +18,7 @@
 #include "include/api/types.h"
 #include "minddata/dataset/include/type_id.h"
 #include "minddata/dataset/core/ascend_resource.h"
 #include "minddata/dataset/kernels/image/image_utils.h"

 namespace mindspore {
 namespace dataset {
@@ -59,6 +60,10 @@ Status AscendResource::Sink(const mindspore::MSTensor &host_input, std::shared_p
                                                MSTypeToDEType(static_cast<TypeId>(host_input.DataType())),
                                                (const uchar *)(host_input.Data().get()), &de_input);
  RETURN_IF_NOT_OK(rc);
  if (!IsNonEmptyJPEG(de_input)) {
    RETURN_STATUS_UNEXPECTED("Dvpp operators can only support processing JPEG image");
  }

  APP_ERROR ret = processor_->H2D_Sink(de_input, *device_input);
  if (ret != APP_ERR_OK) {
    ascend_resource_->Release();
--- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
@@ -87,8 +87,8 @@ const std::vector<int64_t> &DETensor::Shape() const { return shape_; }
 std::shared_ptr<const void> DETensor::Data() const {
 #ifndef ENABLE_ANDROID
  if (is_device_) {
    MS_LOG(ERROR) << "Data() always return the data on the host.";
    return nullptr;
    ASSERT_NULL(device_tensor_impl_);
    return std::shared_ptr<const void>(device_tensor_impl_->GetHostBuffer(), [](const void *) {});
  }
 #endif
  return std::shared_ptr<const void>(tensor_impl_->GetBuffer(), [](const void *) {});
--- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc
@@ -16,6 +16,9 @@

 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/core/device_tensor.h"
 #ifdef ENABLE_ACL
 #include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h"
 #endif
 #include "minddata/dataset/util/status.h"

 namespace mindspore {
@@ -25,6 +28,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten
  std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
  data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
  device_data_type_ = type;
  host_data_tensor_ = nullptr;
 }

 Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) {
@@ -80,6 +84,20 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
  return Status::OK();
 }

 const unsigned char *DeviceTensor::GetHostBuffer() {
 #ifdef ENABLE_ACL
  Status rc = DataPop_(&host_data_tensor_);
  if (!rc.IsOk()) {
    MS_LOG(ERROR) << "Pop device data onto host fail, a nullptr will be returned";
    return nullptr;
  }
 #endif
  if (!host_data_tensor_) {
    return nullptr;
  }
  return host_data_tensor_->GetBuffer();
 }

 uint8_t *DeviceTensor::GetDeviceBuffer() { return device_data_; }

 uint8_t *DeviceTensor::GetDeviceMutableBuffer() { return device_data_; }
@@ -109,5 +127,42 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) {
  size_ = new_size;
  return Status::OK();
 }

 #ifdef ENABLE_ACL
 Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
  void *resHostBuf = nullptr;
  APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize());
  if (ret != APP_ERR_OK) {
    MS_LOG(ERROR) << "Failed to allocate memory from host ret = " << ret;
    return Status(StatusCode::kMDNoSpace);
  }
  std::shared_ptr<void> outBuf(resHostBuf, aclrtFreeHost);
  auto processedInfo_ = outBuf;
  // Memcpy the output data from device to host
  ret = aclrtMemcpy(outBuf.get(), this->DeviceDataSize(), this->GetDeviceBuffer(), this->DeviceDataSize(),
                    ACL_MEMCPY_DEVICE_TO_HOST);
  if (ret != APP_ERR_OK) {
    MS_LOG(ERROR) << "Failed to copy memory from device to host, ret = " << ret;
    return Status(StatusCode::kMDOutOfMemory);
  }
  auto data = std::static_pointer_cast<unsigned char>(processedInfo_);
  unsigned char *ret_ptr = data.get();

  mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize();
  const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1});
  uint32_t _output_width_ = this->GetYuvStrideShape()[0];
  uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1];
  uint32_t _output_height_ = this->GetYuvStrideShape()[2];
  uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3];
  const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8);
  mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor);
  (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_);
  if (!(*host_tensor)->HasData()) {
    return Status(StatusCode::kMCDeviceError);
  }
  MS_LOG(INFO) << "Successfully pop DeviceTensor data onto host";
  return Status::OK();
 }
 #endif
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.h
@@ -43,6 +43,8 @@ class DeviceTensor : public Tensor {
                                       const uint32_t &dataSize, const std::vector<uint32_t> &attributes,
                                       std::shared_ptr<DeviceTensor> *out);

  const unsigned char *GetHostBuffer();

  uint8_t *GetDeviceBuffer();

  uint8_t *GetDeviceMutableBuffer();
@@ -61,6 +63,10 @@ class DeviceTensor : public Tensor {
  Status SetYuvStrideShape_(const uint32_t &width, const uint32_t &widthStride, const uint32_t &height,
                            const uint32_t &heightStride);

 #ifdef ENABLE_ACL
  Status DataPop_(std::shared_ptr<Tensor> *host_tensor);
 #endif

  std::vector<uint32_t> YUV_shape_;  // YUV_shape_ = {width, widthStride, height, heightStride}

  uint8_t *device_data_;
@@ -68,6 +74,9 @@ class DeviceTensor : public Tensor {
  uint32_t size_;

  DataType device_data_type_;

  // We use this Tensor to store device_data when DeviceTensor pop onto host
  std::shared_ptr<Tensor> host_data_tensor_;
 };

 }  // namespace dataset
--- a/tests/st/cpp/dataset/test_de.cc
+++ b/tests/st/cpp/dataset/test_de.cc
@@ -67,7 +67,8 @@ TEST_F(TestDE, TestDvpp) {
 #ifdef ENABLE_ACL
  // Read images from target directory
  std::shared_ptr<mindspore::dataset::Tensor> de_tensor;
  mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  ASSERT_TRUE(rc.IsOk());
  auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));

  // Define dvpp transform
@@ -77,13 +78,13 @@ TEST_F(TestDE, TestDvpp) {
  mindspore::dataset::Execute Transform(decode_resize_crop, MapTargetDevice::kAscend310);

  // Apply transform on images
  Status rc = Transform(image, &image);
  rc = Transform(image, &image);
  std::string aipp_cfg = Transform.AippCfgGenerator();
  ASSERT_EQ(aipp_cfg, "./aipp.cfg");

  // Check image info
  ASSERT_TRUE(rc.IsOk());
  ASSERT_EQ(image.Shape().size(), 3);
  ASSERT_EQ(image.Shape().size(), 2);
  int32_t real_h = 0;
  int32_t real_w = 0;
  int32_t remainder = crop_paras[crop_paras.size() - 1] % 16;
@@ -94,15 +95,21 @@ TEST_F(TestDE, TestDvpp) {
    real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1;
    real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder;
  }
  /* TODO Use in the future after compute college finish their job

  ASSERT_EQ(image.Shape()[0], real_h);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], real_w);
  ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5);
   */

  ASSERT_TRUE(image.Data().get() != nullptr);
  ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8);
  ASSERT_EQ(image.IsDevice(), true);

  /* This is the criterion for previous method(Without pop)
  ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], 1);
  ASSERT_EQ(image.Shape()[2], 1);
  ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5);
  */
 #endif
 }

@@ -110,7 +117,8 @@ TEST_F(TestDE, TestDvppSinkMode) {
 #ifdef ENABLE_ACL
  // Read images from target directory
  std::shared_ptr<mindspore::dataset::Tensor> de_tensor;
  mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  ASSERT_TRUE(rc.IsOk());
  auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));

  // Define dvpp transform
@@ -123,11 +131,11 @@ TEST_F(TestDE, TestDvppSinkMode) {
  mindspore::dataset::Execute Transform(trans_list, MapTargetDevice::kAscend310);

  // Apply transform on images
  Status rc = Transform(image, &image);
  rc = Transform(image, &image);

  // Check image info
  ASSERT_TRUE(rc.IsOk());
  ASSERT_EQ(image.Shape().size(), 3);
  ASSERT_EQ(image.Shape().size(), 2);
  int32_t real_h = 0;
  int32_t real_w = 0;
  int32_t remainder = crop_paras[crop_paras.size() - 1] % 16;
@@ -138,10 +146,13 @@ TEST_F(TestDE, TestDvppSinkMode) {
    real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1;
    real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder;
  }
  ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], 1);
  ASSERT_EQ(image.Shape()[2], 1);
  ASSERT_EQ(image.Shape()[0], real_h);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], real_w);
  ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5);

  ASSERT_TRUE(image.Data().get() != nullptr);
  ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8);
  ASSERT_EQ(image.IsDevice(), true);
  Transform.DeviceMemoryRelease();
 #endif
 }
@@ -149,7 +160,8 @@ TEST_F(TestDE, TestDvppSinkMode) {
 TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) {
 #ifdef ENABLE_ACL
  std::shared_ptr<mindspore::dataset::Tensor> de_tensor;
  mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor);
  ASSERT_TRUE(rc.IsOk());
  auto image = MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));

  // Define dvpp transform
@@ -170,11 +182,11 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) {
  ASSERT_EQ(aipp_cfg, "./aipp.cfg");

  // Apply transform on images
  Status rc = Transform(image, &image);
  rc = Transform(image, &image);

  // Check image info
  ASSERT_TRUE(rc.IsOk());
  ASSERT_EQ(image.Shape().size(), 3);
  ASSERT_EQ(image.Shape().size(), 2);
  int32_t real_h = 0;
  int32_t real_w = 0;
  int32_t remainder = crop_paras[crop_paras.size() - 1] % 16;
@@ -185,10 +197,14 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) {
    real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1;
    real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder;
  }
  ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], 1);
  ASSERT_EQ(image.Shape()[2], 1);

  ASSERT_EQ(image.Shape()[0], real_h);  // For image in YUV format, each pixel takes 1.5 byte
  ASSERT_EQ(image.Shape()[1], real_w);
  ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5);

  ASSERT_TRUE(image.Data().get() != nullptr);
  ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8);
  ASSERT_EQ(image.IsDevice(), true);
  Transform.DeviceMemoryRelease();
 #endif
 }