You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

_dataset_graph.py 5.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Define dataset graph related operations."""
  16. from importlib import import_module
  17. from mindspore import log as logger
  18. from mindspore.train import lineage_pb2
  19. class DatasetGraph:
  20. """Handle the data graph and packages it into binary data."""
  21. def package_dataset_graph(self, dataset):
  22. """
  23. packages dataset graph into binary data
  24. Args:
  25. dataset (MindData): refer to MindDataset
  26. Returns:
  27. DatasetGraph, a object of lineage_pb2.DatasetGraph.
  28. """
  29. dataset_package = import_module('mindspore.dataset')
  30. dataset_dict = dataset_package.serialize(dataset)
  31. dataset_graph_proto = lineage_pb2.DatasetGraph()
  32. if not isinstance(dataset_dict, dict):
  33. logger.warning("The dataset graph serialized from dataset object is not a dict. "
  34. "Its type is %r.", type(dataset_dict).__name__)
  35. return dataset_graph_proto
  36. if "children" in dataset_dict:
  37. children = dataset_dict.pop("children")
  38. if children:
  39. self._package_children(children=children, message=dataset_graph_proto)
  40. self._package_current_dataset(operation=dataset_dict, message=dataset_graph_proto)
  41. return dataset_graph_proto
  42. def _package_children(self, children, message):
  43. """
  44. Package children in dataset operation.
  45. Args:
  46. children (list[dict]): Child operations.
  47. message (DatasetGraph): Children proto message.
  48. """
  49. for child in children:
  50. if child:
  51. child_graph_message = getattr(message, "children").add()
  52. grandson = child.pop("children")
  53. if grandson:
  54. self._package_children(children=grandson, message=child_graph_message)
  55. # package other parameters
  56. self._package_current_dataset(operation=child, message=child_graph_message)
  57. def _package_current_dataset(self, operation, message):
  58. """
  59. Package operation parameters in event message.
  60. Args:
  61. operation (dict): Operation dict.
  62. message (Operation): Operation proto message.
  63. """
  64. for key, value in operation.items():
  65. if value and key == "operations":
  66. for operator in value:
  67. self._package_enhancement_operation(
  68. operator,
  69. message.operations.add()
  70. )
  71. elif value and key == "sampler":
  72. self._package_enhancement_operation(
  73. value,
  74. message.sampler
  75. )
  76. else:
  77. self._package_parameter(key, value, message.parameter)
  78. def _package_enhancement_operation(self, operation, message):
  79. """
  80. Package enhancement operation in MapDataset.
  81. Args:
  82. operation (dict): Enhancement operation.
  83. message (Operation): Enhancement operation proto message.
  84. """
  85. for key, value in operation.items():
  86. if isinstance(value, (list, tuple)):
  87. if all(isinstance(ele, int) for ele in value):
  88. message.size.extend(value)
  89. else:
  90. message.weights.extend(value)
  91. else:
  92. self._package_parameter(key, value, message.operationParam)
  93. @staticmethod
  94. def _package_parameter(key, value, message):
  95. """
  96. Package parameters in operation.
  97. Args:
  98. key (str): Operation name.
  99. value (Union[str, bool, int, float, list, None]): Operation args.
  100. message (OperationParameter): Operation proto message.
  101. """
  102. if isinstance(value, str):
  103. message.mapStr[key] = value
  104. elif isinstance(value, bool):
  105. message.mapBool[key] = value
  106. elif isinstance(value, int):
  107. message.mapInt[key] = value
  108. elif isinstance(value, float):
  109. message.mapDouble[key] = value
  110. elif isinstance(value, (list, tuple)) and key != "operations":
  111. if value:
  112. replace_value_list = list(map(lambda x: "" if x is None else x, value))
  113. message.mapStrList[key].strValue.extend(replace_value_list)
  114. elif value is None:
  115. message.mapStr[key] = "None"
  116. else:
  117. logger.warning("The parameter %r is not recorded, because its type is not supported in event package. "
  118. "Its type is %r.", key, type(value).__name__)