| @@ -410,6 +410,7 @@ Status DEPipeline::SaveDataset(const std::vector<std::string> &file_names, const | |||
| std::vector<std::string> index_fields; | |||
| s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields); | |||
| RETURN_IF_NOT_OK(s); | |||
| MS_LOG(DEBUG) << "Schema of saved mindrecord: " << mr_json.dump(); | |||
| if (mindrecord::SUCCESS != | |||
| mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader."); | |||
| @@ -569,6 +570,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string, | |||
| if (column_name_id_map.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Error: column not found."); | |||
| } | |||
| json dataset_schema; | |||
| for (auto &col : column_name_id_map) { | |||
| auto idx = col.second; | |||
| auto column_name = col.first; | |||
| @@ -580,6 +582,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string, | |||
| auto shapes = column_shape.AsVector(); | |||
| std::vector<int> mr_shape(shapes.begin(), shapes.end()); | |||
| std::string el = column_type.ToString(); | |||
| dataset_schema[column_name] = el; | |||
| if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) { | |||
| std::string err_msg("Error: can not support data type: " + el); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| @@ -605,6 +608,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string, | |||
| if (mr_type == "bytes" || !mr_shape.empty()) continue; | |||
| index_fields->emplace_back(column_name); // candidate of index fields | |||
| } | |||
| MS_LOG(DEBUG) << "Schema of dataset: " << dataset_schema.dump(); | |||
| return Status::OK(); | |||
| } | |||
| Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle, | |||
| @@ -83,7 +83,7 @@ MSRStatus ShardWriter::OpenDataFiles(bool append) { | |||
| // if not append and mindrecord file exist, return FAILED | |||
| fs->open(common::SafeCStr(file), std::ios::in | std::ios::binary); | |||
| if (fs->good()) { | |||
| MS_LOG(ERROR) << "MindRecord file already existed."; | |||
| MS_LOG(ERROR) << "MindRecord file already existed, please delete file: " << common::SafeCStr(file); | |||
| fs->close(); | |||
| return FAILED; | |||
| } | |||
| @@ -1041,12 +1041,61 @@ class Dataset: | |||
| """ | |||
| Save the dynamic data processed by dataset pipeline as common dataset format, support: mindrecord. | |||
| Implicit type casting exists when saving data as mindrecord. Table below shows how to do type casting. | |||
| .. list-table:: Implicit Type Casting of Saving as mindrecord | |||
| :widths: 25 25 50 | |||
| :header-rows: 1 | |||
| * - type in 'dataset' | |||
| - type in 'mindrecord' | |||
| - detail | |||
| * - DE_BOOL | |||
| - None | |||
| - Not support | |||
| * - DE_INT8 | |||
| - int32 | |||
| - | |||
| * - DE_UINT8 | |||
| - bytes(1D uint8) | |||
| - Drop dimension | |||
| * - DE_INT16 | |||
| - int32 | |||
| - | |||
| * - DE_UINT16 | |||
| - int32 | |||
| - | |||
| * - DE_INT32 | |||
| - int32 | |||
| - | |||
| * - DE_UINT32 | |||
| - int64 | |||
| - | |||
| * - DE_INT64 | |||
| - int64 | |||
| - | |||
| * - DE_UINT64 | |||
| - None | |||
| - Not support | |||
| * - DE_FLOAT16 | |||
| - Not support | |||
| - | |||
| * - DE_FLOAT32 | |||
| - float32 | |||
| - | |||
| * - DE_FLOAT64 | |||
| - float64 | |||
| - | |||
| * - DE_STRING | |||
| - string | |||
| - Not support multi-dimensional DE_STRING | |||
| Note: | |||
| 1. To save the samples in order, should set dataset's shuffle false and num_files 1. | |||
| 2. Before call the function, do not use batch, repeat operator or data augmentation operators | |||
| with random attribute in map operator. | |||
| 3. Mindreocrd do not support np.uint64, multi-dimensional np.uint8(drop dimension) and | |||
| multi-dimensional string. | |||
| 3. Mindrecord does not support DE_UINT64, multi-dimensional DE_UINT8(drop dimension) and | |||
| multi-dimensional DE_STRING. | |||
| Args: | |||
| file_name (str): Path to dataset file. | |||