| @@ -13,12 +13,12 @@ | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Introduction to mindrecord: | |||
| Introduction of mindrecord: | |||
| Mindrecord is a module to implement reading, writing, search and | |||
| converting for MindSpore format dataset. Users could load(modify) | |||
| mindrecord data through FileReader(FileWriter). Users could also | |||
| convert other format dataset to mindrecord data through | |||
| convert other format datasets to mindrecord data through | |||
| corresponding sub-module. | |||
| """ | |||
| @@ -29,10 +29,10 @@ class FileReader: | |||
| Class to read MindRecord File series. | |||
| Args: | |||
| file_name (str, list[str]): One of MindRecord File or file list. | |||
| file_name (str, list[str]): One of MindRecord File or a file list. | |||
| num_consumer(int, optional): Number of consumer threads which load data to memory (default=4). | |||
| It should not be smaller than 1 or larger than the number of CPU. | |||
| columns (list[str], optional): List of fields which corresponding data would be read (default=None). | |||
| It should not be smaller than 1 or larger than the number of CPUs. | |||
| columns (list[str], optional): A list of fields where corresponding data would be read (default=None). | |||
| operator(int, optional): Reserved parameter for operators (default=None). | |||
| Raises: | |||
| @@ -72,7 +72,7 @@ class FileReader: | |||
| Yield a batch of data according to columns at a time. | |||
| Yields: | |||
| dict: keys is the same as columns. | |||
| dictionary: keys are the same as columns. | |||
| Raises: | |||
| MRMUnsupportedSchemaError: If schema is invalid. | |||
| @@ -39,11 +39,11 @@ class FileWriter: | |||
| Args: | |||
| file_name (str): File name of MindRecord File. | |||
| shard_num (int, optional): Number of MindRecord File (default=1). | |||
| shard_num (int, optional): The Number of MindRecord File (default=1). | |||
| It should be between [1, 1000]. | |||
| Raises: | |||
| ParamValueError: If file_name or shard_num is invalid. | |||
| ParamValueError: If `file_name` or `shard_num` is invalid. | |||
| """ | |||
| def __init__(self, file_name, shard_num=1): | |||
| check_filename(file_name) | |||
| @@ -88,7 +88,7 @@ class FileWriter: | |||
| Raises: | |||
| ParamValueError: If file_name is invalid. | |||
| FileNameError: If path contains invalid character. | |||
| FileNameError: If path contains invalid characters. | |||
| MRMOpenError: If failed to open MindRecord File. | |||
| MRMOpenForAppendError: If failed to open file for appending data. | |||
| """ | |||
| @@ -111,14 +111,14 @@ class FileWriter: | |||
| def add_schema(self, content, desc=None): | |||
| """ | |||
| Returns a schema id if added schema successfully, or raise exception. | |||
| Return a schema id if schema is added successfully, or raise an exception. | |||
| Args: | |||
| content (dict): Dict of user defined schema. | |||
| content (dict): Dictionary of user defined schema. | |||
| desc (str, optional): String of schema description (default=None). | |||
| Returns: | |||
| int, schema id. | |||
| An integer, schema id. | |||
| Raises: | |||
| MRMInvalidSchemaError: If schema is invalid. | |||
| @@ -145,7 +145,7 @@ class FileWriter: | |||
| ParamTypeError: If index field is invalid. | |||
| MRMDefineIndexError: If index field is not primitive type. | |||
| MRMAddIndexError: If failed to add index field. | |||
| MRMGetMetaError: If the schema is not set or get meta failed. | |||
| MRMGetMetaError: If the schema is not set or failed to get meta. | |||
| """ | |||
| if not index_fields or not isinstance(index_fields, list): | |||
| raise ParamTypeError('index_fields', 'list') | |||
| @@ -205,7 +205,7 @@ class FileWriter: | |||
| def open_and_set_header(self): | |||
| """ | |||
| Open writer and set header | |||
| Open writer and set header. | |||
| """ | |||
| if not self._writer.is_open: | |||
| @@ -245,7 +245,7 @@ class FileWriter: | |||
| """ | |||
| Set the size of header which contains shard information, schema information, \ | |||
| page meta information, etc. The larger the header, the more training data \ | |||
| a single mindrecord file can store. | |||
| a single Mindrecord file can store. | |||
| Args: | |||
| header_size (int): Size of header, between 16KB and 128MB. | |||
| @@ -278,7 +278,7 @@ class FileWriter: | |||
| def commit(self): | |||
| """ | |||
| Flush data to disk and generate the corresponding db files. | |||
| Flush data to disk and generate the corresponding database files. | |||
| Returns: | |||
| MSRStatus, SUCCESS or FAILED. | |||
| @@ -28,12 +28,12 @@ class MindPage: | |||
| Class to read MindRecord File series in pagination. | |||
| Args: | |||
| file_name (str): One of MindRecord File or file list. | |||
| num_consumer(int, optional): Number of consumer threads which load data to memory (default=4). | |||
| It should not be smaller than 1 or larger than the number of CPU. | |||
| file_name (str): One of MindRecord File or a file list. | |||
| num_consumer(int, optional): The number of consumer threads which load data to memory (default=4). | |||
| It should not be smaller than 1 or larger than the number of CPUs. | |||
| Raises: | |||
| ParamValueError: If file_name, num_consumer or columns is invalid. | |||
| ParamValueError: If `file_name`, `num_consumer` or columns is invalid. | |||
| MRMInitSegmentError: If failed to initialize ShardSegment. | |||
| """ | |||
| def __init__(self, file_name, num_consumer=4): | |||
| @@ -97,7 +97,7 @@ class MindPage: | |||
| @property | |||
| def category_field(self): | |||
| """Getter function for category field""" | |||
| """Getter function for category fields.""" | |||
| return self._category_field | |||
| @category_field.setter | |||
| @@ -127,7 +127,7 @@ class MindPage: | |||
| Query by category id in pagination. | |||
| Args: | |||
| category_id (int): Category id, referred to the return of read_category_info. | |||
| category_id (int): Category id, referred to the return of `read_category_info`. | |||
| page (int): Index of page. | |||
| num_row (int): Number of rows in a page. | |||
| @@ -153,7 +153,7 @@ class MindPage: | |||
| Args: | |||
| category_name (str): String of category field's value, | |||
| referred to the return of read_category_info. | |||
| referred to the return of `read_category_info`. | |||
| page (int): Index of page. | |||
| num_row (int): Number of row in a page. | |||
| @@ -35,7 +35,7 @@ __all__ = ['Cifar100ToMR'] | |||
| class Cifar100ToMR: | |||
| """ | |||
| Class is for transformation from cifar100 to MindRecord. | |||
| A class to transform from cifar100 to MindRecord. | |||
| Args: | |||
| source (str): the cifar100 directory to be transformed. | |||
| @@ -71,10 +71,10 @@ class Cifar100ToMR: | |||
| Executes transformation from cifar100 to MindRecord. | |||
| Args: | |||
| fields (list[str]): list of index field, ie. ["fine_label", "coarse_label"]. | |||
| fields (list[str]): A list of index field, e.g.["fine_label", "coarse_label"]. | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfully written into MindRecord. | |||
| SUCCESS or FAILED, whether cifar100 is successfully transformed to MindRecord. | |||
| """ | |||
| if fields and not isinstance(fields, list): | |||
| raise ValueError("The parameter fields should be None or list") | |||
| @@ -34,7 +34,7 @@ __all__ = ['Cifar10ToMR'] | |||
| class Cifar10ToMR: | |||
| """ | |||
| Class is for transformation from cifar10 to MindRecord. | |||
| A class to transform from cifar10 to MindRecord. | |||
| Args: | |||
| source (str): the cifar10 directory to be transformed. | |||
| @@ -70,10 +70,10 @@ class Cifar10ToMR: | |||
| Executes transformation from cifar10 to MindRecord. | |||
| Args: | |||
| fields (list[str], optional): list of index fields, ie. ["label"] (default=None). | |||
| fields (list[str], optional): A list of index fields, e.g.["label"] (default=None). | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfully written into MindRecord. | |||
| SUCCESS or FAILED, whether cifar10 is successfully transformed to MindRecord. | |||
| """ | |||
| if fields and not isinstance(fields, list): | |||
| raise ValueError("The parameter fields should be None or list") | |||
| @@ -31,17 +31,17 @@ __all__ = ['CsvToMR'] | |||
| class CsvToMR: | |||
| """ | |||
| Class is for transformation from csv to MindRecord. | |||
| A class to transform from csv to MindRecord. | |||
| Args: | |||
| source (str): the file path of csv. | |||
| destination (str): the MindRecord file path to transform into. | |||
| columns_list(list[str], optional): List of columns to be read(default=None). | |||
| columns_list(list[str], optional): A list of columns to be read(default=None). | |||
| partition_number (int, optional): partition size (default=1). | |||
| Raises: | |||
| ValueError: If source, destination, partition_number is invalid. | |||
| RuntimeError: If columns_list is invalid. | |||
| ValueError: If `source`, `destination`, `partition_number` is invalid. | |||
| RuntimeError: If `columns_list` is invalid. | |||
| """ | |||
| def __init__(self, source, destination, columns_list=None, partition_number=1): | |||
| @@ -121,7 +121,7 @@ class CsvToMR: | |||
| Executes transformation from csv to MindRecord. | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfully written into MindRecord. | |||
| SUCCESS or FAILED, whether csv is successfully transformed to MindRecord. | |||
| """ | |||
| if not os.path.exists(self.source): | |||
| raise IOError("Csv file {} do not exist.".format(self.source)) | |||
| @@ -28,11 +28,10 @@ __all__ = ['ImageNetToMR'] | |||
| class ImageNetToMR: | |||
| """ | |||
| Class is for transformation from imagenet to MindRecord. | |||
| A class to transform from imagenet to MindRecord. | |||
| Args: | |||
| map_file (str): the map file which indicates label. | |||
| the map file content should like this: | |||
| map_file (str): the map file that indicates label. The map file content should be like this: | |||
| .. code-block:: | |||
| @@ -41,12 +40,12 @@ class ImageNetToMR: | |||
| n02110185 2 | |||
| n02096294 3 | |||
| image_dir (str): image directory contains n02119789, n02100735, n02110185, n02096294 dir. | |||
| image_dir (str): image directory contains n02119789, n02100735, n02110185 and n02096294 directory. | |||
| destination (str): the MindRecord file path to transform into. | |||
| partition_number (int, optional): partition size (default=1). | |||
| Raises: | |||
| ValueError: If map_file, image_dir or destination is invalid. | |||
| ValueError: If `map_file`, `image_dir` or `destination` is invalid. | |||
| """ | |||
| def __init__(self, map_file, image_dir, destination, partition_number=1): | |||
| check_filename(map_file) | |||
| @@ -123,7 +122,7 @@ class ImageNetToMR: | |||
| Executes transformation from imagenet to MindRecord. | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfully written into MindRecord. | |||
| SUCCESS or FAILED, whether imagenet is successfully transformed to MindRecord. | |||
| """ | |||
| t0_total = time.time() | |||
| @@ -34,17 +34,17 @@ __all__ = ['MnistToMR'] | |||
| class MnistToMR: | |||
| """ | |||
| Class is for transformation from Mnist to MindRecord. | |||
| A class to transform from Mnist to MindRecord. | |||
| Args: | |||
| source (str): directory which contains t10k-images-idx3-ubyte.gz, | |||
| train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz, | |||
| train-labels-idx1-ubyte.gz. | |||
| source (str): directory that contains t10k-images-idx3-ubyte.gz, | |||
| train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz | |||
| and train-labels-idx1-ubyte.gz. | |||
| destination (str): the MindRecord file directory to transform into. | |||
| partition_number (int, optional): partition size (default=1). | |||
| Raises: | |||
| ValueError: If source/destination/partition_number is invalid. | |||
| ValueError: If `source`, `destination`, `partition_number` is invalid. | |||
| """ | |||
| def __init__(self, source, destination, partition_number=1): | |||
| @@ -173,7 +173,7 @@ class MnistToMR: | |||
| Executes transformation from Mnist test part to MindRecord. | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfully written into MindRecord. | |||
| SUCCESS or FAILED, whether Mnist is successfully transformed to MindRecord. | |||
| """ | |||
| t0_total = time.time() | |||
| @@ -99,25 +99,25 @@ def _cast_name(key): | |||
| class TFRecordToMR: | |||
| """ | |||
| Class is for tranformation from TFRecord to MindRecord. | |||
| A class to transform from TFRecord to MindRecord. | |||
| Args: | |||
| source (str): the TFRecord file to be transformed. | |||
| destination (str): the MindRecord file path to tranform into. | |||
| feature_dict (dict): a dictionary that states the feature type, i.e. | |||
| feature_dict (dict): a dictionary that states the feature type, e.g. | |||
| feature_dict = {"xxxx": tf.io.FixedLenFeature([], tf.string), \ | |||
| "yyyy": tf.io.FixedLenFeature([], tf.int64)} | |||
| **Follow case which uses VarLenFeature not support** | |||
| **Follow case which uses VarLenFeature is not supported.** | |||
| feature_dict = {"context": {"xxxx": tf.io.FixedLenFeature([], tf.string), \ | |||
| "yyyy": tf.io.VarLenFeature(tf.int64)}, \ | |||
| "sequence": {"zzzz": tf.io.FixedLenSequenceFeature([], tf.float32)}} | |||
| bytes_fields (list, optional): the bytes fields which are in feature_dict and can be images bytes. | |||
| bytes_fields (list, optional): the bytes fields which are in `feature_dict` and can be images bytes. | |||
| Raises: | |||
| ValueError: If parameter is invalid. | |||
| Exception: when tensorflow module not found or version is not correct. | |||
| Exception: when tensorflow module is not found or version is not correct. | |||
| """ | |||
| def __init__(self, source, destination, feature_dict, bytes_fields=None): | |||
| if not tf: | |||
| @@ -211,7 +211,7 @@ class TFRecordToMR: | |||
| ms_dict[cast_key] = float(val.numpy()) | |||
| def tfrecord_iterator(self): | |||
| """Yield a dict with key to be fields in schema, and value to be data.""" | |||
| """Yield a dictionary whose keys are fields in schema.""" | |||
| dataset = tf.data.TFRecordDataset(self.source) | |||
| dataset = dataset.map(self._parse_record) | |||
| iterator = dataset.__iter__() | |||
| @@ -237,10 +237,10 @@ class TFRecordToMR: | |||
| def run(self): | |||
| """ | |||
| Executes transform from TFRecord to MindRecord. | |||
| Execute transformation from TFRecord to MindRecord. | |||
| Returns: | |||
| SUCCESS/FAILED, whether successfuly written into MindRecord. | |||
| SUCCESS or FAILED, whether TFRecord is successfuly transformed to MindRecord. | |||
| """ | |||
| writer = FileWriter(self.destination) | |||
| logger.info("Transformed MindRecord schema is: {}, TFRecord feature dict is: {}" | |||