Browse Source

Added get dataset size and fix macro

pull/14023/head
Eric 5 years ago
parent
commit
38944eafc4
8 changed files with 65 additions and 7 deletions
  1. +4
    -2
      mindspore/ccsrc/minddata/dataset/api/vision.cc
  2. +1
    -1
      mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc
  3. +1
    -1
      mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
  4. +37
    -1
      mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
  5. +10
    -1
      mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
  6. +1
    -1
      mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc
  7. +2
    -0
      mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc
  8. +9
    -0
      tests/ut/cpp/dataset/c_api_dataset_album_test.cc

+ 4
- 2
mindspore/ccsrc/minddata/dataset/api/vision.cc View File

@@ -38,7 +38,6 @@ namespace dataset {

// Transform operations for computer vision.
namespace vision {
#ifndef ENABLE_ANDROID
// CONSTRUCTORS FOR API CLASSES TO CREATE VISION TENSOR TRANSFORM OPERATIONS
// (In alphabetical order)

@@ -69,6 +68,7 @@ std::shared_ptr<TensorOperation> Affine::Parse() {
data_->interpolation_, data_->fill_value_);
}

#ifndef ENABLE_ANDROID
// AutoContrast Transform Operation.
struct AutoContrast::Data {
Data(float cutoff, const std::vector<uint32_t> &ignore) : cutoff_(cutoff), ignore_(ignore) {}
@@ -290,7 +290,7 @@ std::shared_ptr<TensorOperation> Normalize::Parse(const MapTargetDevice &env) {
if (env == MapTargetDevice::kAscend310) {
#ifdef ENABLE_ACL
return std::make_shared<DvppNormalizeOperation>(data_->mean_, data_->std_);
#endif
#endif // ENABLE_ACL
}
return std::make_shared<NormalizeOperation>(data_->mean_, data_->std_);
}
@@ -328,6 +328,7 @@ Pad::Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderTy
std::shared_ptr<TensorOperation> Pad::Parse() {
return std::make_shared<PadOperation>(data_->padding_, data_->fill_value_, data_->padding_mode_);
}
#endif // not ENABLE_ANDROID

// RandomAffine Transform Operation.
struct RandomAffine::Data {
@@ -358,6 +359,7 @@ std::shared_ptr<TensorOperation> RandomAffine::Parse() {
data_->shear_ranges_, data_->interpolation_, data_->fill_value_);
}

#ifndef ENABLE_ANDROID
// RandomColor Transform Operation.
struct RandomColor::Data {
Data(float t_lb, float t_ub) : t_lb_(t_lb), t_ub_(t_ub) {}


+ 1
- 1
mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc View File

@@ -115,7 +115,7 @@ Status MapOp::FetchNextWork(uint32_t worker_id, std::unique_ptr<DataBuffer> *db,

Status MapOp::GenerateWorkerJob(const std::unique_ptr<MapWorkerJob> *worker_job) {
std::shared_ptr<MapJob> map_job = nullptr;
MapTargetDevice prev_target;
MapTargetDevice prev_target = MapTargetDevice::kCpu;
for (size_t i = 0; i < tfuncs_.size(); i++) {
// Currently we only have CPU as the device target
// In the future, we will have heuristic or control from user to select target device


+ 1
- 1
mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc View File

@@ -107,7 +107,7 @@ Status AlbumOp::PrescanEntry() {
Path folder(folder_path_);
dirname_offset_ = folder_path_.length();
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
if (folder.Exists() == false || dirItr == nullptr) {
if (!folder.Exists() || dirItr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_);
}
MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";


+ 37
- 1
mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc View File

@@ -70,7 +70,7 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
RETURN_IF_NOT_OK(schema->LoadSchemaFile(schema_path_, column_names_));

// Argument that is not exposed to user in the API.
std::set<std::string> extensions = {};
std::set<std::string> extensions = {".json", ".JSON"};
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));

@@ -89,5 +89,41 @@ Status AlbumNode::GetShardId(int32_t *shard_id) {
return Status::OK();
}

// Get Dataset size
Status AlbumNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) {
if (dataset_size_ > 0) {
*dataset_size = dataset_size_;
return Status::OK();
}
int64_t sample_size = -1;
int64_t num_rows = 0;
// iterate over the files in the directory and count files to initiate num_rows
Path folder(dataset_dir_);
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
if (!folder.Exists() || dirItr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + dataset_dir_);
}
std::set<std::string> extensions = {".json", ".JSON"};

while (dirItr->hasNext()) {
Path file = dirItr->next();
if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) {
num_rows += 1;
}
}
// give sampler the total number of files and check if num_samples is smaller
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows);
if (sample_size == -1) {
RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
}
*dataset_size = sample_size;
// We cache dataset size so as to not duplicated run
dataset_size_ = *dataset_size;
return Status::OK();
}

} // namespace dataset
} // namespace mindspore

+ 10
- 1
mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -61,6 +61,15 @@ class AlbumNode : public MappableSourceNode {
/// \return Status Status::OK() if get shard id successfully
Status GetShardId(int32_t *shard_id) override;

/// \brief Base-class override for GetDatasetSize
/// \param[in] size_getter Shared pointer to DatasetSizeGetter
/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
/// dataset size at the expense of accuracy.
/// \param[out] dataset_size the size of the dataset
/// \return Status of the function
Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) override;

/// \brief Getter functions
const std::string &DatasetDir() const { return dataset_dir_; }
const std::string &SchemaPath() const { return schema_path_; }


+ 1
- 1
mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc View File

@@ -207,7 +207,7 @@ uint64_t ProfilingTime::GetCurMilliSecond() {
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::chrono::steady_clock;
return duration_cast<milliseconds>(steady_clock::now().time_since_epoch()).count();
return static_cast<uint64_t>(duration_cast<milliseconds>(steady_clock::now().time_since_epoch()).count());
}
} // namespace dataset
} // namespace mindspore

+ 2
- 0
mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc View File

@@ -474,6 +474,7 @@ Status PadOperation::to_json(nlohmann::json *out_json) {
*out_json = args;
return Status::OK();
}
#endif

// RandomAffineOperation
RandomAffineOperation::RandomAffineOperation(const std::vector<float_t> &degrees,
@@ -586,6 +587,7 @@ Status RandomAffineOperation::to_json(nlohmann::json *out_json) {
return Status::OK();
}

#ifndef ENABLE_ANDROID
// RandomColorOperation.
RandomColorOperation::RandomColorOperation(float t_lb, float t_ub) : t_lb_(t_lb), t_ub_(t_ub) { random_op_ = true; }



+ 9
- 0
tests/ut/cpp/dataset/c_api_dataset_album_test.cc View File

@@ -123,11 +123,20 @@ TEST_F(MindDataTestPipeline, TestAlbumGetters) {

int64_t num_classes = ds->GetNumClasses();
EXPECT_EQ(num_classes, -1);
int64_t num_samples = ds->GetDatasetSize();
EXPECT_EQ(num_samples, 7);

int64_t batch_size = ds->GetBatchSize();
EXPECT_EQ(batch_size, 1);
int64_t repeat_count = ds->GetRepeatCount();
EXPECT_EQ(repeat_count, 1);
EXPECT_EQ(ds->GetColumnNames(), column_names);

// Test get dataset size with num_samples > files in dataset
auto sampler = std::make_shared<SequentialSampler>(0, 12);
std::shared_ptr<Dataset> ds2 = Album(folder_path, schema_file, column_names, false, sampler);
num_samples = ds->GetDatasetSize();
EXPECT_EQ(num_samples, 7);
}

TEST_F(MindDataTestPipeline, TestAlbumDecode) {


Loading…
Cancel
Save