| @@ -35,13 +35,13 @@ class TensorRow { | |||||
| static constexpr row_id_type kDefaultRowId = -1; // Default row id | static constexpr row_id_type kDefaultRowId = -1; // Default row id | ||||
| // Type definitions | // Type definitions | ||||
| typedef dsize_t size_type; | |||||
| typedef std::shared_ptr<Tensor> value_type; | |||||
| typedef std::shared_ptr<Tensor> &reference; | |||||
| typedef const std::shared_ptr<Tensor> &const_reference; | |||||
| typedef std::vector<std::shared_ptr<Tensor>> vector_type; | |||||
| typedef std::vector<std::shared_ptr<Tensor>>::iterator iterator; | |||||
| typedef std::vector<std::shared_ptr<Tensor>>::const_iterator const_iterator; | |||||
| using size_type = dsize_t; | |||||
| using value_type = std::shared_ptr<Tensor>; | |||||
| using reference = std::shared_ptr<Tensor> &; | |||||
| using const_reference = const std::shared_ptr<Tensor> &; | |||||
| using vector_type = std::vector<std::shared_ptr<Tensor>>; | |||||
| using iterator = std::vector<std::shared_ptr<Tensor>>::iterator; | |||||
| using const_iterator = std::vector<std::shared_ptr<Tensor>>::const_iterator; | |||||
| TensorRow() noexcept; | TensorRow() noexcept; | ||||
| @@ -84,7 +84,12 @@ Status IteratorBase::FetchNextTensorRow(TensorRow *out_row) { | |||||
| // Constructor of the DatasetIterator | // Constructor of the DatasetIterator | ||||
| DatasetIterator::DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree) | DatasetIterator::DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree) | ||||
| : IteratorBase(), root_(exe_tree->root()), tracing_(nullptr), cur_batch_num_(0), cur_connector_size_(0) { | |||||
| : IteratorBase(), | |||||
| root_(exe_tree->root()), | |||||
| tracing_(nullptr), | |||||
| cur_batch_num_(0), | |||||
| cur_connector_size_(0), | |||||
| cur_connector_capacity_(0) { | |||||
| std::shared_ptr<Tracing> node; | std::shared_ptr<Tracing> node; | ||||
| Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node); | Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node); | ||||
| if (s.IsOk()) { | if (s.IsOk()) { | ||||
| @@ -237,6 +237,5 @@ Status BucketBatchByLengthOp::Reset() { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -146,7 +146,6 @@ class BucketBatchByLengthOp : public PipelineOp { | |||||
| std::unique_ptr<ChildIterator> child_iterator_; | std::unique_ptr<ChildIterator> child_iterator_; | ||||
| std::vector<std::unique_ptr<TensorQTable>> buckets_; | std::vector<std::unique_ptr<TensorQTable>> buckets_; | ||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -112,6 +112,8 @@ class BuildVocabOp : public ParallelOp { | |||||
| BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, std::pair<int64_t, int64_t> freq_range, | BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, std::pair<int64_t, int64_t> freq_range, | ||||
| int64_t top_k, int32_t num_workers, int32_t op_connector_size); | int64_t top_k, int32_t num_workers, int32_t op_connector_size); | ||||
| ~BuildVocabOp() = default; | |||||
| Status WorkerEntry(int32_t worker_id) override; | Status WorkerEntry(int32_t worker_id) override; | ||||
| // collect the work product from each worker | // collect the work product from each worker | ||||
| @@ -30,7 +30,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| ClueOp::Builder::Builder() | ClueOp::Builder::Builder() | ||||
| : builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) { | : builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) { | ||||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | ||||
| @@ -545,6 +544,5 @@ Status ClueOp::CountAllFileRows(const std::vector<std::string> &files, int64_t * | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -264,7 +264,6 @@ class ClueOp : public ParallelOp { | |||||
| bool load_jagged_connector_; | bool load_jagged_connector_; | ||||
| ColKeyMap cols_to_keyword_; | ColKeyMap cols_to_keyword_; | ||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ | #endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_ | ||||
| @@ -59,8 +59,8 @@ CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||||
| Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { | Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { | ||||
| RETURN_IF_NOT_OK(SanityCheck()); | RETURN_IF_NOT_OK(SanityCheck()); | ||||
| if (builder_sampler_ == nullptr) { | if (builder_sampler_ == nullptr) { | ||||
| int64_t num_samples = 0; | |||||
| int64_t start_index = 0; | |||||
| const int64_t num_samples = 0; | |||||
| const int64_t start_index = 0; | |||||
| builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples); | builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples); | ||||
| } | } | ||||
| builder_schema_ = std::make_unique<DataSchema>(); | builder_schema_ = std::make_unique<DataSchema>(); | ||||
| @@ -44,6 +44,8 @@ class ConnectorSize : public Sampling { | |||||
| public: | public: | ||||
| explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {} | explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {} | ||||
| ~ConnectorSize() = default; | |||||
| // Driver function for connector size sampling. | // Driver function for connector size sampling. | ||||
| // This function samples the connector size of every nodes within the ExecutionTree | // This function samples the connector size of every nodes within the ExecutionTree | ||||
| Status Sample() override; | Status Sample() override; | ||||
| @@ -54,7 +56,7 @@ class ConnectorSize : public Sampling { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status SaveToFile() override; | Status SaveToFile() override; | ||||
| Status Init(const std::string &dir_path, const std::string &device_id); | |||||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||||
| // Parse op infomation and transform to json format | // Parse op infomation and transform to json format | ||||
| json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size); | json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size); | ||||
| @@ -28,7 +28,7 @@ class DatasetIteratorTracing : public Tracing { | |||||
| DatasetIteratorTracing() = default; | DatasetIteratorTracing() = default; | ||||
| // Destructor | // Destructor | ||||
| ~DatasetIteratorTracing() = default; | |||||
| ~DatasetIteratorTracing() override = default; | |||||
| // Record tracing data | // Record tracing data | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| @@ -40,7 +40,7 @@ class DatasetIteratorTracing : public Tracing { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status SaveToFile() override; | Status SaveToFile() override; | ||||
| Status Init(const std::string &dir_path, const std::string &device_id); | |||||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||||
| private: | private: | ||||
| std::vector<std::string> value_; | std::vector<std::string> value_; | ||||
| @@ -29,7 +29,7 @@ class DeviceQueueTracing : public Tracing { | |||||
| DeviceQueueTracing() = default; | DeviceQueueTracing() = default; | ||||
| // Destructor | // Destructor | ||||
| ~DeviceQueueTracing() = default; | |||||
| ~DeviceQueueTracing() override = default; | |||||
| // Record tracing data | // Record tracing data | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| @@ -41,7 +41,7 @@ class DeviceQueueTracing : public Tracing { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status SaveToFile() override; | Status SaveToFile() override; | ||||
| Status Init(const std::string &dir_path, const std::string &device_id); | |||||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||||
| private: | private: | ||||
| std::vector<std::string> value_; | std::vector<std::string> value_; | ||||
| @@ -25,6 +25,7 @@ namespace dataset { | |||||
| Monitor::Monitor(ExecutionTree *tree) : tree_(tree) { | Monitor::Monitor(ExecutionTree *tree) : tree_(tree) { | ||||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | ||||
| sampling_interval_ = cfg->monitor_sampling_interval(); | sampling_interval_ = cfg->monitor_sampling_interval(); | ||||
| max_samples_ = 0; | |||||
| } | } | ||||
| Status Monitor::operator()() { | Status Monitor::operator()() { | ||||
| @@ -33,6 +33,8 @@ class Monitor { | |||||
| Monitor() = default; | Monitor() = default; | ||||
| ~Monitor() = default; | |||||
| // Functor for Perf Monitor main loop. | // Functor for Perf Monitor main loop. | ||||
| // This function will be the entry point of Mindspore::Dataset::Task | // This function will be the entry point of Mindspore::Dataset::Task | ||||
| Status operator()(); | Status operator()(); | ||||
| @@ -99,7 +99,7 @@ class ProfilingManager { | |||||
| // If profiling is enabled. | // If profiling is enabled. | ||||
| bool IsProfilingEnable() const; | bool IsProfilingEnable() const; | ||||
| std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; } | |||||
| const std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; } | |||||
| private: | private: | ||||
| std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_; | std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_; | ||||
| @@ -119,7 +119,8 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i | |||||
| data_item.tensorShape_ = dataShapes; | data_item.tensorShape_ = dataShapes; | ||||
| data_item.tensorType_ = datatype; | data_item.tensorType_ = datatype; | ||||
| data_item.dataLen_ = ts->SizeInBytes(); | data_item.dataLen_ = ts->SizeInBytes(); | ||||
| data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](void *elem) {}); | |||||
| data_item.dataPtr_ = | |||||
| std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](const void *elem) {}); | |||||
| items.emplace_back(data_item); | items.emplace_back(data_item); | ||||
| MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " | MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " | ||||
| << ts->Size() << "."; | << ts->Size() << "."; | ||||
| @@ -21,7 +21,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| Status FillOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | Status FillOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | ||||
| IO_CHECK(input, output); | IO_CHECK(input, output); | ||||
| Status s = Fill(input, output, fill_value_); | Status s = Fill(input, output, fill_value_); | ||||
| @@ -26,7 +26,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| class FillOp : public TensorOp { | class FillOp : public TensorOp { | ||||
| public: | public: | ||||
| explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {} | explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {} | ||||
| @@ -39,9 +38,7 @@ class FillOp : public TensorOp { | |||||
| private: | private: | ||||
| std::shared_ptr<Tensor> fill_value_; | std::shared_ptr<Tensor> fill_value_; | ||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_FILL_OP_H | #endif // MINDSPORE_FILL_OP_H | ||||
| @@ -351,7 +351,7 @@ vector<uint8_t> ShardColumn::CompressInt(const vector<uint8_t> &src_bytes, const | |||||
| // Write this int to destination blob | // Write this int to destination blob | ||||
| uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n); | uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n); | ||||
| auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type); | auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type); | ||||
| for (uint64_t j = 0; j < (kUnsignedOne << dst_int_type); j++) { | |||||
| for (uint64_t j = 0; j < (kUnsignedOne << static_cast<uint8_t>(dst_int_type)); j++) { | |||||
| dst_bytes[i_dst++] = temp_bytes[j]; | dst_bytes[i_dst++] = temp_bytes[j]; | ||||
| } | } | ||||
| @@ -406,7 +406,10 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr< | |||||
| auto data = reinterpret_cast<const unsigned char *>(array_data.get()); | auto data = reinterpret_cast<const unsigned char *>(array_data.get()); | ||||
| *data_ptr = std::make_unique<unsigned char[]>(*num_bytes); | *data_ptr = std::make_unique<unsigned char[]>(*num_bytes); | ||||
| memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes); | |||||
| int ret_code = memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes); | |||||
| if (ret_code != 0) { | |||||
| MS_LOG(ERROR) << "Failed to copy data!"; | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -444,7 +447,8 @@ int64_t ShardColumn::BytesLittleToMinIntType(const std::vector<uint8_t> &bytes_a | |||||
| const IntegerType &src_i_type, IntegerType *dst_i_type) { | const IntegerType &src_i_type, IntegerType *dst_i_type) { | ||||
| uint64_t u_temp = 0; | uint64_t u_temp = 0; | ||||
| for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) { | for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) { | ||||
| u_temp = (u_temp << kBitsOfByte) + bytes_array[pos + (kUnsignedOne << src_i_type) - kUnsignedOne - i]; | |||||
| u_temp = (u_temp << kBitsOfByte) + | |||||
| bytes_array[pos + (kUnsignedOne << static_cast<uint8_t>(src_i_type)) - kUnsignedOne - i]; | |||||
| } | } | ||||
| int64_t i_out; | int64_t i_out; | ||||
| @@ -554,26 +554,28 @@ def adjust_hue(img, hue_factor): | |||||
| Returns: | Returns: | ||||
| img (PIL Image), Hue adjusted image. | img (PIL Image), Hue adjusted image. | ||||
| """ | """ | ||||
| if not -0.5 <= hue_factor <= 0.5: | |||||
| raise ValueError('hue_factor {} is not in [-0.5, 0.5].'.format(hue_factor)) | |||||
| image = img | |||||
| image_hue_factor = hue_factor | |||||
| if not -0.5 <= image_hue_factor <= 0.5: | |||||
| raise ValueError('image_hue_factor {} is not in [-0.5, 0.5].'.format(image_hue_factor)) | |||||
| if not is_pil(img): | |||||
| raise TypeError(augment_error_message.format(type(img))) | |||||
| if not is_pil(image): | |||||
| raise TypeError(augment_error_message.format(type(image))) | |||||
| input_mode = img.mode | |||||
| if input_mode in {'L', '1', 'I', 'F'}: | |||||
| return img | |||||
| mode = image.mode | |||||
| if mode in {'L', '1', 'I', 'F'}: | |||||
| return image | |||||
| h, s, v = img.convert('HSV').split() | |||||
| hue, saturation, value = img.convert('HSV').split() | |||||
| np_h = np.array(h, dtype=np.uint8) | |||||
| np_hue = np.array(hue, dtype=np.uint8) | |||||
| with np.errstate(over='ignore'): | with np.errstate(over='ignore'): | ||||
| np_h += np.uint8(hue_factor * 255) | |||||
| h = Image.fromarray(np_h, 'L') | |||||
| np_hue += np.uint8(image_hue_factor * 255) | |||||
| hue = Image.fromarray(np_hue, 'L') | |||||
| img = Image.merge('HSV', (h, s, v)).convert(input_mode) | |||||
| return img | |||||
| image = Image.merge('HSV', (hue, saturation, value)).convert(mode) | |||||
| return image | |||||
| def to_type(img, output_type): | def to_type(img, output_type): | ||||
| @@ -1,124 +0,0 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # import jsbeautifier | |||||
| import os | |||||
| import urllib | |||||
| import urllib.request | |||||
| def create_data_cache_dir(): | |||||
| cwd = os.getcwd() | |||||
| target_directory = os.path.join(cwd, "data_cache") | |||||
| try: | |||||
| if not os.path.exists(target_directory): | |||||
| os.mkdir(target_directory) | |||||
| except OSError: | |||||
| print("Creation of the directory %s failed" % target_directory) | |||||
| return target_directory | |||||
| def download_and_uncompress(files, source_url, target_directory, is_tar=False): | |||||
| for f in files: | |||||
| url = source_url + f | |||||
| target_file = os.path.join(target_directory, f) | |||||
| ##check if file already downloaded | |||||
| if not (os.path.exists(target_file) or os.path.exists(target_file[:-3])): | |||||
| urllib.request.urlretrieve(url, target_file) | |||||
| if is_tar: | |||||
| print("extracting from local tar file " + target_file) | |||||
| rc = os.system("tar -C " + target_directory + " -xvf " + target_file) | |||||
| else: | |||||
| print("unzipping " + target_file) | |||||
| rc = os.system("gunzip -f " + target_file) | |||||
| if rc != 0: | |||||
| print("Failed to uncompress ", target_file, " removing") | |||||
| os.system("rm " + target_file) | |||||
| ##exit with error so that build script will fail | |||||
| raise SystemError | |||||
| else: | |||||
| print("Using cached dataset at ", target_file) | |||||
| def download_mnist(target_directory=None): | |||||
| if target_directory is None: | |||||
| target_directory = create_data_cache_dir() | |||||
| ##create mnst directory | |||||
| target_directory = os.path.join(target_directory, "mnist") | |||||
| try: | |||||
| if not os.path.exists(target_directory): | |||||
| os.mkdir(target_directory) | |||||
| except OSError: | |||||
| print("Creation of the directory %s failed" % target_directory) | |||||
| MNIST_URL = "http://yann.lecun.com/exdb/mnist/" | |||||
| files = ['train-images-idx3-ubyte.gz', | |||||
| 'train-labels-idx1-ubyte.gz', | |||||
| 't10k-images-idx3-ubyte.gz', | |||||
| 't10k-labels-idx1-ubyte.gz'] | |||||
| download_and_uncompress(files, MNIST_URL, target_directory, is_tar=False) | |||||
| return target_directory, os.path.join(target_directory, "datasetSchema.json") | |||||
| CIFAR_URL = "https://www.cs.toronto.edu/~kriz/" | |||||
| def download_cifar(target_directory, files, directory_from_tar): | |||||
| if target_directory is None: | |||||
| target_directory = create_data_cache_dir() | |||||
| download_and_uncompress([files], CIFAR_URL, target_directory, is_tar=True) | |||||
| ##if target dir was specify move data from directory created by tar | |||||
| ##and put data into target dir | |||||
| if target_directory is not None: | |||||
| tar_dir_full_path = os.path.join(target_directory, directory_from_tar) | |||||
| all_files = os.path.join(tar_dir_full_path, "*") | |||||
| cmd = "mv " + all_files + " " + target_directory | |||||
| if os.path.exists(tar_dir_full_path): | |||||
| print("copy files back to target_directory") | |||||
| print("Executing: ", cmd) | |||||
| rc1 = os.system(cmd) | |||||
| rc2 = os.system("rm -r " + tar_dir_full_path) | |||||
| if rc1 != 0 or rc2 != 0: | |||||
| print("error when running command: ", cmd) | |||||
| download_file = os.path.join(target_directory, files) | |||||
| print("removing " + download_file) | |||||
| os.system("rm " + download_file) | |||||
| ##exit with error so that build script will fail | |||||
| raise SystemError | |||||
| ##change target directory to directory after tar | |||||
| return os.path.join(target_directory, directory_from_tar) | |||||
| def download_cifar10(target_directory=None): | |||||
| return download_cifar(target_directory, "cifar-10-binary.tar.gz", "cifar-10-batches-bin") | |||||
| def download_cifar100(target_directory=None): | |||||
| return download_cifar(target_directory, "cifar-100-binary.tar.gz", "cifar-100-binary") | |||||
| def download_all_for_test(cwd): | |||||
| download_mnist(os.path.join(cwd, "testMnistData")) | |||||
| ##Download all datasets to existing test directories | |||||
| if __name__ == "__main__": | |||||
| download_all_for_test(os.getcwd()) | |||||