Browse Source

!1993 [Dataset] Fix codedex.

tags/v0.5.0-beta
Yang 6 years ago
parent
commit
dee8471d23
20 changed files with 52 additions and 166 deletions
  1. +7
    -7
      mindspore/ccsrc/dataset/core/tensor_row.h
  2. +6
    -1
      mindspore/ccsrc/dataset/engine/dataset_iterator.cc
  3. +0
    -1
      mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc
  4. +0
    -1
      mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h
  5. +2
    -0
      mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h
  6. +0
    -2
      mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc
  7. +0
    -1
      mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h
  8. +2
    -2
      mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
  9. +3
    -1
      mindspore/ccsrc/dataset/engine/perf/connector_size.h
  10. +2
    -2
      mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h
  11. +2
    -2
      mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h
  12. +1
    -0
      mindspore/ccsrc/dataset/engine/perf/monitor.cc
  13. +2
    -0
      mindspore/ccsrc/dataset/engine/perf/monitor.h
  14. +1
    -1
      mindspore/ccsrc/dataset/engine/perf/profiling.h
  15. +2
    -1
      mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
  16. +0
    -1
      mindspore/ccsrc/dataset/kernels/data/fill_op.cc
  17. +0
    -3
      mindspore/ccsrc/dataset/kernels/data/fill_op.h
  18. +7
    -3
      mindspore/ccsrc/mindrecord/meta/shard_column.cc
  19. +15
    -13
      mindspore/dataset/transforms/vision/py_transforms_util.py
  20. +0
    -124
      tests/ut/python/dataset/prep_data.py

+ 7
- 7
mindspore/ccsrc/dataset/core/tensor_row.h View File

@@ -35,13 +35,13 @@ class TensorRow {
static constexpr row_id_type kDefaultRowId = -1; // Default row id static constexpr row_id_type kDefaultRowId = -1; // Default row id


// Type definitions // Type definitions
typedef dsize_t size_type;
typedef std::shared_ptr<Tensor> value_type;
typedef std::shared_ptr<Tensor> &reference;
typedef const std::shared_ptr<Tensor> &const_reference;
typedef std::vector<std::shared_ptr<Tensor>> vector_type;
typedef std::vector<std::shared_ptr<Tensor>>::iterator iterator;
typedef std::vector<std::shared_ptr<Tensor>>::const_iterator const_iterator;
using size_type = dsize_t;
using value_type = std::shared_ptr<Tensor>;
using reference = std::shared_ptr<Tensor> &;
using const_reference = const std::shared_ptr<Tensor> &;
using vector_type = std::vector<std::shared_ptr<Tensor>>;
using iterator = std::vector<std::shared_ptr<Tensor>>::iterator;
using const_iterator = std::vector<std::shared_ptr<Tensor>>::const_iterator;


TensorRow() noexcept; TensorRow() noexcept;




+ 6
- 1
mindspore/ccsrc/dataset/engine/dataset_iterator.cc View File

@@ -84,7 +84,12 @@ Status IteratorBase::FetchNextTensorRow(TensorRow *out_row) {


// Constructor of the DatasetIterator // Constructor of the DatasetIterator
DatasetIterator::DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree) DatasetIterator::DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree)
: IteratorBase(), root_(exe_tree->root()), tracing_(nullptr), cur_batch_num_(0), cur_connector_size_(0) {
: IteratorBase(),
root_(exe_tree->root()),
tracing_(nullptr),
cur_batch_num_(0),
cur_connector_size_(0),
cur_connector_capacity_(0) {
std::shared_ptr<Tracing> node; std::shared_ptr<Tracing> node;
Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node); Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node);
if (s.IsOk()) { if (s.IsOk()) {


+ 0
- 1
mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.cc View File

@@ -237,6 +237,5 @@ Status BucketBatchByLengthOp::Reset() {


return Status::OK(); return Status::OK();
} }

} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

+ 0
- 1
mindspore/ccsrc/dataset/engine/datasetops/bucket_batch_by_length_op.h View File

@@ -146,7 +146,6 @@ class BucketBatchByLengthOp : public PipelineOp {
std::unique_ptr<ChildIterator> child_iterator_; std::unique_ptr<ChildIterator> child_iterator_;
std::vector<std::unique_ptr<TensorQTable>> buckets_; std::vector<std::unique_ptr<TensorQTable>> buckets_;
}; };

} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore




+ 2
- 0
mindspore/ccsrc/dataset/engine/datasetops/build_vocab_op.h View File

@@ -112,6 +112,8 @@ class BuildVocabOp : public ParallelOp {
BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, std::pair<int64_t, int64_t> freq_range, BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, std::pair<int64_t, int64_t> freq_range,
int64_t top_k, int32_t num_workers, int32_t op_connector_size); int64_t top_k, int32_t num_workers, int32_t op_connector_size);


~BuildVocabOp() = default;

Status WorkerEntry(int32_t worker_id) override; Status WorkerEntry(int32_t worker_id) override;


// collect the work product from each worker // collect the work product from each worker


+ 0
- 2
mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.cc View File

@@ -30,7 +30,6 @@


namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {

ClueOp::Builder::Builder() ClueOp::Builder::Builder()
: builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) { : builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) {
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
@@ -545,6 +544,5 @@ Status ClueOp::CountAllFileRows(const std::vector<std::string> &files, int64_t *
} }
return Status::OK(); return Status::OK();
} }

} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

+ 0
- 1
mindspore/ccsrc/dataset/engine/datasetops/source/clue_op.h View File

@@ -264,7 +264,6 @@ class ClueOp : public ParallelOp {
bool load_jagged_connector_; bool load_jagged_connector_;
ColKeyMap cols_to_keyword_; ColKeyMap cols_to_keyword_;
}; };

} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ #endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_

+ 2
- 2
mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc View File

@@ -59,8 +59,8 @@ CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck()); RETURN_IF_NOT_OK(SanityCheck());
if (builder_sampler_ == nullptr) { if (builder_sampler_ == nullptr) {
int64_t num_samples = 0;
int64_t start_index = 0;
const int64_t num_samples = 0;
const int64_t start_index = 0;
builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples); builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples);
} }
builder_schema_ = std::make_unique<DataSchema>(); builder_schema_ = std::make_unique<DataSchema>();


+ 3
- 1
mindspore/ccsrc/dataset/engine/perf/connector_size.h View File

@@ -44,6 +44,8 @@ class ConnectorSize : public Sampling {
public: public:
explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {} explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {}


~ConnectorSize() = default;

// Driver function for connector size sampling. // Driver function for connector size sampling.
// This function samples the connector size of every nodes within the ExecutionTree // This function samples the connector size of every nodes within the ExecutionTree
Status Sample() override; Status Sample() override;
@@ -54,7 +56,7 @@ class ConnectorSize : public Sampling {
// @return Status - The error code return // @return Status - The error code return
Status SaveToFile() override; Status SaveToFile() override;


Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;


// Parse op infomation and transform to json format // Parse op infomation and transform to json format
json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size); json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size);


+ 2
- 2
mindspore/ccsrc/dataset/engine/perf/dataset_iterator_tracing.h View File

@@ -28,7 +28,7 @@ class DatasetIteratorTracing : public Tracing {
DatasetIteratorTracing() = default; DatasetIteratorTracing() = default;


// Destructor // Destructor
~DatasetIteratorTracing() = default;
~DatasetIteratorTracing() override = default;


// Record tracing data // Record tracing data
// @return Status - The error code return // @return Status - The error code return
@@ -40,7 +40,7 @@ class DatasetIteratorTracing : public Tracing {
// @return Status - The error code return // @return Status - The error code return
Status SaveToFile() override; Status SaveToFile() override;


Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;


private: private:
std::vector<std::string> value_; std::vector<std::string> value_;


+ 2
- 2
mindspore/ccsrc/dataset/engine/perf/device_queue_tracing.h View File

@@ -29,7 +29,7 @@ class DeviceQueueTracing : public Tracing {
DeviceQueueTracing() = default; DeviceQueueTracing() = default;


// Destructor // Destructor
~DeviceQueueTracing() = default;
~DeviceQueueTracing() override = default;


// Record tracing data // Record tracing data
// @return Status - The error code return // @return Status - The error code return
@@ -41,7 +41,7 @@ class DeviceQueueTracing : public Tracing {
// @return Status - The error code return // @return Status - The error code return
Status SaveToFile() override; Status SaveToFile() override;


Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;


private: private:
std::vector<std::string> value_; std::vector<std::string> value_;


+ 1
- 0
mindspore/ccsrc/dataset/engine/perf/monitor.cc View File

@@ -25,6 +25,7 @@ namespace dataset {
Monitor::Monitor(ExecutionTree *tree) : tree_(tree) { Monitor::Monitor(ExecutionTree *tree) : tree_(tree) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
sampling_interval_ = cfg->monitor_sampling_interval(); sampling_interval_ = cfg->monitor_sampling_interval();
max_samples_ = 0;
} }


Status Monitor::operator()() { Status Monitor::operator()() {


+ 2
- 0
mindspore/ccsrc/dataset/engine/perf/monitor.h View File

@@ -33,6 +33,8 @@ class Monitor {


Monitor() = default; Monitor() = default;


~Monitor() = default;

// Functor for Perf Monitor main loop. // Functor for Perf Monitor main loop.
// This function will be the entry point of Mindspore::Dataset::Task // This function will be the entry point of Mindspore::Dataset::Task
Status operator()(); Status operator()();


+ 1
- 1
mindspore/ccsrc/dataset/engine/perf/profiling.h View File

@@ -99,7 +99,7 @@ class ProfilingManager {
// If profiling is enabled. // If profiling is enabled.
bool IsProfilingEnable() const; bool IsProfilingEnable() const;


std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; }
const std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; }


private: private:
std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_; std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_;


+ 2
- 1
mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc View File

@@ -119,7 +119,8 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
data_item.tensorShape_ = dataShapes; data_item.tensorShape_ = dataShapes;
data_item.tensorType_ = datatype; data_item.tensorType_ = datatype;
data_item.dataLen_ = ts->SizeInBytes(); data_item.dataLen_ = ts->SizeInBytes();
data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](void *elem) {});
data_item.dataPtr_ =
std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](const void *elem) {});
items.emplace_back(data_item); items.emplace_back(data_item);
MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is "
<< ts->Size() << "."; << ts->Size() << ".";


+ 0
- 1
mindspore/ccsrc/dataset/kernels/data/fill_op.cc View File

@@ -21,7 +21,6 @@


namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {

Status FillOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { Status FillOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output); IO_CHECK(input, output);
Status s = Fill(input, output, fill_value_); Status s = Fill(input, output, fill_value_);


+ 0
- 3
mindspore/ccsrc/dataset/kernels/data/fill_op.h View File

@@ -26,7 +26,6 @@


namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {

class FillOp : public TensorOp { class FillOp : public TensorOp {
public: public:
explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {} explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {}
@@ -39,9 +38,7 @@ class FillOp : public TensorOp {
private: private:
std::shared_ptr<Tensor> fill_value_; std::shared_ptr<Tensor> fill_value_;
}; };

} // namespace dataset } // namespace dataset

} // namespace mindspore } // namespace mindspore


#endif // MINDSPORE_FILL_OP_H #endif // MINDSPORE_FILL_OP_H

+ 7
- 3
mindspore/ccsrc/mindrecord/meta/shard_column.cc View File

@@ -351,7 +351,7 @@ vector<uint8_t> ShardColumn::CompressInt(const vector<uint8_t> &src_bytes, const
// Write this int to destination blob // Write this int to destination blob
uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n); uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n);
auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type); auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type);
for (uint64_t j = 0; j < (kUnsignedOne << dst_int_type); j++) {
for (uint64_t j = 0; j < (kUnsignedOne << static_cast<uint8_t>(dst_int_type)); j++) {
dst_bytes[i_dst++] = temp_bytes[j]; dst_bytes[i_dst++] = temp_bytes[j];
} }


@@ -406,7 +406,10 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr<


auto data = reinterpret_cast<const unsigned char *>(array_data.get()); auto data = reinterpret_cast<const unsigned char *>(array_data.get());
*data_ptr = std::make_unique<unsigned char[]>(*num_bytes); *data_ptr = std::make_unique<unsigned char[]>(*num_bytes);
memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes);
int ret_code = memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes);
if (ret_code != 0) {
MS_LOG(ERROR) << "Failed to copy data!";
}


return SUCCESS; return SUCCESS;
} }
@@ -444,7 +447,8 @@ int64_t ShardColumn::BytesLittleToMinIntType(const std::vector<uint8_t> &bytes_a
const IntegerType &src_i_type, IntegerType *dst_i_type) { const IntegerType &src_i_type, IntegerType *dst_i_type) {
uint64_t u_temp = 0; uint64_t u_temp = 0;
for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) { for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) {
u_temp = (u_temp << kBitsOfByte) + bytes_array[pos + (kUnsignedOne << src_i_type) - kUnsignedOne - i];
u_temp = (u_temp << kBitsOfByte) +
bytes_array[pos + (kUnsignedOne << static_cast<uint8_t>(src_i_type)) - kUnsignedOne - i];
} }


int64_t i_out; int64_t i_out;


+ 15
- 13
mindspore/dataset/transforms/vision/py_transforms_util.py View File

@@ -554,26 +554,28 @@ def adjust_hue(img, hue_factor):
Returns: Returns:
img (PIL Image), Hue adjusted image. img (PIL Image), Hue adjusted image.
""" """
if not -0.5 <= hue_factor <= 0.5:
raise ValueError('hue_factor {} is not in [-0.5, 0.5].'.format(hue_factor))
image = img
image_hue_factor = hue_factor
if not -0.5 <= image_hue_factor <= 0.5:
raise ValueError('image_hue_factor {} is not in [-0.5, 0.5].'.format(image_hue_factor))


if not is_pil(img):
raise TypeError(augment_error_message.format(type(img)))
if not is_pil(image):
raise TypeError(augment_error_message.format(type(image)))


input_mode = img.mode
if input_mode in {'L', '1', 'I', 'F'}:
return img
mode = image.mode
if mode in {'L', '1', 'I', 'F'}:
return image


h, s, v = img.convert('HSV').split()
hue, saturation, value = img.convert('HSV').split()


np_h = np.array(h, dtype=np.uint8)
np_hue = np.array(hue, dtype=np.uint8)


with np.errstate(over='ignore'): with np.errstate(over='ignore'):
np_h += np.uint8(hue_factor * 255)
h = Image.fromarray(np_h, 'L')
np_hue += np.uint8(image_hue_factor * 255)
hue = Image.fromarray(np_hue, 'L')


img = Image.merge('HSV', (h, s, v)).convert(input_mode)
return img
image = Image.merge('HSV', (hue, saturation, value)).convert(mode)
return image




def to_type(img, output_type): def to_type(img, output_type):


+ 0
- 124
tests/ut/python/dataset/prep_data.py View File

@@ -1,124 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# import jsbeautifier

import os
import urllib
import urllib.request


def create_data_cache_dir():
cwd = os.getcwd()
target_directory = os.path.join(cwd, "data_cache")
try:
if not os.path.exists(target_directory):
os.mkdir(target_directory)
except OSError:
print("Creation of the directory %s failed" % target_directory)
return target_directory


def download_and_uncompress(files, source_url, target_directory, is_tar=False):
for f in files:
url = source_url + f
target_file = os.path.join(target_directory, f)

##check if file already downloaded
if not (os.path.exists(target_file) or os.path.exists(target_file[:-3])):
urllib.request.urlretrieve(url, target_file)
if is_tar:
print("extracting from local tar file " + target_file)
rc = os.system("tar -C " + target_directory + " -xvf " + target_file)
else:
print("unzipping " + target_file)
rc = os.system("gunzip -f " + target_file)
if rc != 0:
print("Failed to uncompress ", target_file, " removing")
os.system("rm " + target_file)
##exit with error so that build script will fail
raise SystemError
else:
print("Using cached dataset at ", target_file)


def download_mnist(target_directory=None):
if target_directory is None:
target_directory = create_data_cache_dir()

##create mnst directory
target_directory = os.path.join(target_directory, "mnist")
try:
if not os.path.exists(target_directory):
os.mkdir(target_directory)
except OSError:
print("Creation of the directory %s failed" % target_directory)

MNIST_URL = "http://yann.lecun.com/exdb/mnist/"
files = ['train-images-idx3-ubyte.gz',
'train-labels-idx1-ubyte.gz',
't10k-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz']
download_and_uncompress(files, MNIST_URL, target_directory, is_tar=False)

return target_directory, os.path.join(target_directory, "datasetSchema.json")


CIFAR_URL = "https://www.cs.toronto.edu/~kriz/"


def download_cifar(target_directory, files, directory_from_tar):
if target_directory is None:
target_directory = create_data_cache_dir()

download_and_uncompress([files], CIFAR_URL, target_directory, is_tar=True)

##if target dir was specify move data from directory created by tar
##and put data into target dir
if target_directory is not None:
tar_dir_full_path = os.path.join(target_directory, directory_from_tar)
all_files = os.path.join(tar_dir_full_path, "*")
cmd = "mv " + all_files + " " + target_directory
if os.path.exists(tar_dir_full_path):
print("copy files back to target_directory")
print("Executing: ", cmd)
rc1 = os.system(cmd)
rc2 = os.system("rm -r " + tar_dir_full_path)
if rc1 != 0 or rc2 != 0:
print("error when running command: ", cmd)
download_file = os.path.join(target_directory, files)
print("removing " + download_file)
os.system("rm " + download_file)

##exit with error so that build script will fail
raise SystemError

##change target directory to directory after tar
return os.path.join(target_directory, directory_from_tar)


def download_cifar10(target_directory=None):
return download_cifar(target_directory, "cifar-10-binary.tar.gz", "cifar-10-batches-bin")


def download_cifar100(target_directory=None):
return download_cifar(target_directory, "cifar-100-binary.tar.gz", "cifar-100-binary")


def download_all_for_test(cwd):
download_mnist(os.path.join(cwd, "testMnistData"))


##Download all datasets to existing test directories
if __name__ == "__main__":
download_all_for_test(os.getcwd())

Loading…
Cancel
Save