You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_dataset_save.cc 6.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <stdio.h>
  17. #include "common/common.h"
  18. #include "minddata/dataset/include/datasets.h"
  19. #include "minddata/dataset/include/transforms.h"
  20. // IR non-leaf nodes
  21. #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
  22. #include "minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h"
  23. #include "minddata/dataset/engine/ir/datasetops/concat_node.h"
  24. #include "minddata/dataset/engine/ir/datasetops/map_node.h"
  25. #include "minddata/dataset/engine/ir/datasetops/project_node.h"
  26. #include "minddata/dataset/engine/ir/datasetops/rename_node.h"
  27. #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
  28. #include "minddata/dataset/engine/ir/datasetops/skip_node.h"
  29. #include "minddata/dataset/engine/ir/datasetops/zip_node.h"
  30. // IR leaf nodes
  31. #include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
  32. #include "minddata/dataset/engine/ir/datasetops/source/minddata_node.h"
  33. using namespace mindspore::dataset::api;
  34. using mindspore::dataset::Tensor;
  35. class MindDataTestPipeline : public UT::DatasetOpTesting {
  36. protected:
  37. };
  38. TEST_F(MindDataTestPipeline, TestSaveCifar10AndLoad) {
  39. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveCifar10AndLoad(single mindrecord file).";
  40. // Stage 1: load original dataset
  41. // Create a Cifar10 Dataset
  42. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  43. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", SequentialSampler(0, 10));
  44. EXPECT_NE(ds, nullptr);
  45. // Create an iterator over the result of the above dataset
  46. // This will trigger the creation of the Execution Tree and launch it.
  47. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  48. EXPECT_NE(iter, nullptr);
  49. // Iterate the dataset and get each row
  50. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  51. std::vector<std::shared_ptr<Tensor>> original_data;
  52. iter->GetNextRow(&row);
  53. // Save original data for comparison
  54. uint64_t i = 0;
  55. while (row.size() != 0) {
  56. auto label = row["label"];
  57. original_data.push_back(label);
  58. MS_LOG(INFO) << "Tensor label: " << *label;
  59. iter->GetNextRow(&row);
  60. i++;
  61. }
  62. // Expect 10 samples
  63. EXPECT_EQ(i, 10);
  64. // Manually terminate the pipeline
  65. iter->Stop();
  66. // Stage 2: Save data processed by the dataset pipeline
  67. // Create an iterator over the result of the above dataset
  68. // This will trigger the creation of the Execution Tree and launch it.
  69. std::string temp_file = datasets_root_path_ + "/testCifar10Data/mind.mind";
  70. std::string temp_file_db = datasets_root_path_ + "/testCifar10Data/mind.mind.db";
  71. bool rc = ds->Save(temp_file);
  72. EXPECT_EQ(rc, true);
  73. // Stage 3: Load dataset from file output by stage 2
  74. // Create a MindData Dataset
  75. std::shared_ptr<Dataset> ds_minddata = MindData(temp_file, {}, SequentialSampler(0, 10));
  76. // Create objects for the tensor ops
  77. // uint32 will be casted to int64 implicitly in mindrecord file, so we have to cast it back to uint32
  78. std::shared_ptr<TensorOperation> type_cast = transforms::TypeCast("uint32");
  79. EXPECT_NE(type_cast, nullptr);
  80. // Create a Map operation on ds
  81. ds_minddata = ds_minddata->Map({type_cast}, {"label"});
  82. EXPECT_NE(ds_minddata, nullptr);
  83. // Create an iterator over the result of the above dataset
  84. // This will trigger the creation of the Execution Tree and launch it.
  85. std::shared_ptr<Iterator> iter_minddata = ds_minddata->CreateIterator();
  86. EXPECT_NE(iter_minddata, nullptr);
  87. // Iterate the dataset and get each row
  88. std::unordered_map<std::string, std::shared_ptr<Tensor>> row_minddata;
  89. iter_minddata->GetNextRow(&row_minddata);
  90. // Check column name for each row
  91. EXPECT_NE(row_minddata.find("image"), row_minddata.end());
  92. EXPECT_NE(row_minddata.find("label"), row_minddata.end());
  93. // Expect the output data is same with original_data
  94. uint64_t j = 0;
  95. while (row_minddata.size() != 0) {
  96. auto label = row_minddata["label"];
  97. EXPECT_EQ(*original_data[j], *label);
  98. MS_LOG(INFO) << "Tensor label: " << *label;
  99. iter_minddata->GetNextRow(&row_minddata);
  100. j++;
  101. }
  102. // Expect 10 samples
  103. EXPECT_EQ(j, 10);
  104. // Manually terminate the pipeline
  105. iter_minddata->Stop();
  106. // Delete temp file
  107. EXPECT_EQ(remove(temp_file.c_str()), 0);
  108. EXPECT_EQ(remove(temp_file_db.c_str()), 0);
  109. }
  110. TEST_F(MindDataTestPipeline, TestSaveFail) {
  111. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveFail with incorrect param.";
  112. // Create a Cifar10 Dataset
  113. std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
  114. std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", SequentialSampler(0, 10));
  115. EXPECT_NE(ds, nullptr);
  116. // fail with invalid dataset_path
  117. std::string temp_file1 = "";
  118. bool rc1 = ds->Save(temp_file1);
  119. EXPECT_EQ(rc1, false);
  120. // fail with invalid dataset_path
  121. std::string temp_file2 = datasets_root_path_ + "/testCifar10Data/";
  122. bool rc2 = ds->Save(temp_file2);
  123. EXPECT_EQ(rc2, false);
  124. // fail with invalid num_files
  125. std::string temp_file3 = datasets_root_path_ + "/testCifar10Data/mind.mind";
  126. bool rc3 = ds->Save(temp_file3, 0);
  127. EXPECT_EQ(rc3, false);
  128. // fail with invalid num_files
  129. std::string temp_file4 = datasets_root_path_ + "/testCifar10Data/mind.mind";
  130. bool rc4 = ds->Save(temp_file4, 1001);
  131. EXPECT_EQ(rc4, false);
  132. // fail with invalid dataset_type
  133. std::string temp_file5 = datasets_root_path_ + "/testCifar10Data/mind.mind";
  134. bool rc5 = ds->Save(temp_file5, 5, "tfrecord");
  135. EXPECT_EQ(rc5, false);
  136. }