You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

_dataset_graph.py 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Define dataset graph related operations."""
  16. import json
  17. from importlib import import_module
  18. from mindspore.train import lineage_pb2
  19. class DatasetGraph:
  20. """Handle the data graph and packages it into binary data."""
  21. def package_dataset_graph(self, dataset):
  22. """
  23. packages dataset graph into binary data
  24. Args:
  25. dataset (MindData): refer to MindDataset
  26. Returns:
  27. DatasetGraph, a object of lineage_pb2.DatasetGraph.
  28. """
  29. dataset_package = import_module('mindspore.dataset')
  30. dataset_dict = dataset_package.serialize(dataset)
  31. json_str = json.dumps(dataset_dict, indent=2)
  32. dataset_dict = json.loads(json_str)
  33. dataset_graph_proto = lineage_pb2.DatasetGraph()
  34. if "children" in dataset_dict:
  35. children = dataset_dict.pop("children")
  36. if children:
  37. self._package_children(children=children, message=dataset_graph_proto)
  38. self._package_current_dataset(operation=dataset_dict, message=dataset_graph_proto)
  39. return dataset_graph_proto
  40. def _package_children(self, children, message):
  41. """
  42. Package children in dataset operation.
  43. Args:
  44. children (list[dict]): Child operations.
  45. message (DatasetGraph): Children proto message.
  46. """
  47. for child in children:
  48. if child:
  49. child_graph_message = getattr(message, "children").add()
  50. grandson = child.pop("children")
  51. if grandson:
  52. self._package_children(children=grandson, message=child_graph_message)
  53. # package other parameters
  54. self._package_current_dataset(operation=child, message=child_graph_message)
  55. def _package_current_dataset(self, operation, message):
  56. """
  57. Package operation parameters in event message.
  58. Args:
  59. operation (dict): Operation dict.
  60. message (Operation): Operation proto message.
  61. """
  62. for key, value in operation.items():
  63. if value and key == "operations":
  64. for operator in value:
  65. self._package_enhancement_operation(
  66. operator,
  67. message.operations.add()
  68. )
  69. elif value and key == "sampler":
  70. self._package_enhancement_operation(
  71. value,
  72. message.sampler
  73. )
  74. else:
  75. self._package_parameter(key, value, message.parameter)
  76. def _package_enhancement_operation(self, operation, message):
  77. """
  78. Package enhancement operation in MapDataset.
  79. Args:
  80. operation (dict): Enhancement operation.
  81. message (Operation): Enhancement operation proto message.
  82. """
  83. for key, value in operation.items():
  84. if isinstance(value, list):
  85. if all(isinstance(ele, int) for ele in value):
  86. message.size.extend(value)
  87. else:
  88. message.weights.extend(value)
  89. else:
  90. self._package_parameter(key, value, message.operationParam)
  91. @staticmethod
  92. def _package_parameter(key, value, message):
  93. """
  94. Package parameters in operation.
  95. Args:
  96. key (str): Operation name.
  97. value (Union[str, bool, int, float, list, None]): Operation args.
  98. message (OperationParameter): Operation proto message.
  99. """
  100. if isinstance(value, str):
  101. message.mapStr[key] = value
  102. elif isinstance(value, bool):
  103. message.mapBool[key] = value
  104. elif isinstance(value, int):
  105. message.mapInt[key] = value
  106. elif isinstance(value, float):
  107. message.mapDouble[key] = value
  108. elif isinstance(value, list) and key != "operations":
  109. if value:
  110. replace_value_list = list(map(lambda x: "" if x is None else x, value))
  111. message.mapStrList[key].strValue.extend(replace_value_list)
  112. elif value is None:
  113. message.mapStr[key] = "None"
  114. else:
  115. raise ValueError(f"Parameter {key} is not supported in event package.")