|
- #!/usr/bin/env python3
- # coding: utf-8
- # Copyright 2019 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- """Automatic differentiation of tensor expressions."""
- import akg
- from akg.tvm._ffi.function import _init_api
- from akg.tvm._ffi.node import NodeBase, register_node
- from akg.utils.format_transform import get_shape
-
- _init_api("akg.autodiff")
-
- def collect_subtensors_by_name(tensor, name, result):
- """
- find all the subtensors with names matched the pattern `name`.
-
- Args:
- tensor: An input tensor.
- name: the `name` pattern to be matched.
- result: list of all subtensors found with name matched.
-
- Returns:
- list of all subtensors found with name matched.
- """
- for child in tensor.op.input_tensors:
- child_result = collect_by_name(child, name, result)
- result.extend(child_result)
- if tensor.op.name.find(name) != -1:
- result.append([tensor])
- return result
-
-
- @akg.tvm.register_func("akg.autodiff.export_to_DOT")
- def export_to_dot(tensors, filename="test.dot"):
- """
- Export computation tree of tensors to a DOT file.
-
- Args:
- tensors: A single/list/array of input tensors.
- filename: the name of the DOT file to be generated.
- """
-
- def export_tensor_shape(a_shape):
- result = "("
- for _, a_shp in enumerate(a_shape):
- result = result + str(a_shp.value) + ", "
- result = result + ")"
- return result
-
- def recursive_collect_nodes(tensor, exported_op_nodes, repeat_name):
- if tensor in exported_op_nodes:
- return exported_op_nodes, repeat_name
-
- if not exported_op_nodes:
- exported_op_nodes = {tensor: tensor.op.name}
- else:
- if tensor.op.name in exported_op_nodes.values():
- exported_op_nodes[tensor] = tensor.op.name + '_r' + str(repeat_name)
- repeat_name = repeat_name + 1
- else:
- exported_op_nodes[tensor] = tensor.op.name
- # exported_op_nodes[tensor] contains the name in DOT for "tensor"
- # If name is duplicated, a postfix '-r' + number is add to the end
- for child in tensor.op.input_tensors:
- if child not in exported_op_nodes:
- exported_op_nodes, repeat_name = recursive_collect_nodes(child, exported_op_nodes, repeat_name)
- return exported_op_nodes, repeat_name
-
- def export_node_name(tensor):
- if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
- if isinstance(tensor.op.body[0], akg.tvm.expr.Reduce):
- tensor_opcode_name = 'Reduce'
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Mul):
- tensor_opcode_name = '*'
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Add):
- tensor_opcode_name = '+'
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Sub):
- tensor_opcode_name = '-'
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Div):
- tensor_opcode_name = '/'
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Call):
- tensor_opcode_name = 'Call ' + tensor.op.body[0].name
- elif isinstance(tensor.op.body[0], akg.tvm.expr.Cast):
- tensor_opcode_name = 'Cast:' + tensor.op.input_tensors[0].dtype + '=>' + tensor.dtype
- else:
- tensor_opcode_name = 'Unsupported yet OP'
- tensor_node_name = ' "' + exported_op_nodes[tensor] + '" [label = "' + exported_op_nodes[tensor] +\
- '\\n' + export_tensor_shape(tensor.shape) + '; ' + tensor.dtype + '\\n' +\
- tensor_opcode_name + '"; shape = ellipse; style = filled; color = lightgrey];'
- else: # isinstance(tensor.op,akg.tvm.tensor.PlaceholderOp):
- tensor_node_name = ' "' + exported_op_nodes[tensor] + '" [label = "' + exported_op_nodes[tensor] +\
- '\\n' + export_tensor_shape(tensor.shape) +\
- '"; shape = box; style = filled; color = lightseagreen];'
- return tensor_node_name
-
- def recursive_export_nodes_name(tensor, f, exported_op_nodes):
- for child in tensor.op.input_tensors:
- recursive_export_nodes_name(child, f, exported_op_nodes)
-
- if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
- if isinstance(tensor.op.body[0], (akg.tvm.expr.Mul, akg.tvm.expr.Add, akg.tvm.expr.Sub, akg.tvm.expr.Div)):
- if len(tensor.op.input_tensors) < 2:
- if isinstance(tensor.op.body[0].a, akg.tvm.expr.FloatImm):
- tensor_node_name = ' "Const_a_' + exported_op_nodes[tensor] +\
- '" [label = "' + str(tensor.op.body[0].a.value) + '\\n' +\
- tensor.op.body[0].a.dtype +\
- '"; shape = box; style = filled; color = lightseagreen];'
- f.write(tensor_node_name + "\n")
- if isinstance(tensor.op.body[0].b, akg.tvm.expr.FloatImm):
- tensor_node_name = ' "Const_b_' + exported_op_nodes[tensor] +\
- '" [label = "' + str(tensor.op.body[0].b.value) + '\\n' +\
- tensor.op.body[0].b.dtype +\
- '"; shape = box; style = filled; color = lightseagreen];'
- f.write(tensor_node_name + "\n")
- f.write(export_node_name(tensor) + "\n")
-
- def recursive_export_edges(tensor, f, exported_op_nodes, exported_edges):
- to_name = '"' + exported_op_nodes[tensor] + '"'
- for child in tensor.op.input_tensors:
- recursive_export_edges(child, f, exported_op_nodes, exported_edges)
- from_name = '"' + exported_op_nodes[child] + '"'
- if (from_name, to_name) not in exported_edges:
- exported_edges.add((from_name, to_name))
- f.write(' ' + from_name + " -> " + to_name
- + ' [label = "' + export_tensor_shape(child.shape) + '"];\n')
- if isinstance(tensor.op, akg.tvm.tensor.ComputeOp):
- if isinstance(tensor.op.body[0], (akg.tvm.expr.Mul, akg.tvm.expr.Add, akg.tvm.expr.Sub, akg.tvm.expr.Div)):
- if len(tensor.op.input_tensors) < 2:
- if isinstance(tensor.op.body[0].a, akg.tvm.expr.FloatImm):
- from_name = '"Const_a_' + exported_op_nodes[tensor] + '"'
- if (from_name, to_name) not in exported_edges:
- exported_edges.add((from_name, to_name))
- f.write(' ' + from_name + " -> " + to_name + ' [label = "(const)"];\n')
- if isinstance(tensor.op.body[0].b, akg.tvm.expr.FloatImm):
- from_name = '"Const_b_' + exported_op_nodes[tensor] + '"'
- if (from_name, to_name) not in exported_edges:
- exported_edges.add((from_name, to_name))
- f.write(' ' + from_name + " -> " + to_name + ' [label = "(const)"];\n')
- return exported_edges
-
- with open(filename, "w+") as f_out:
- f_out.write('digraph G {\n ration = compress;\n nodesep = 0.1; rankdir = BT\n')
-
- exported_op_nodes = dict() # dict of {tensor, tensor_name}
- exported_edges = set()
- repeat_name = 0
-
- if isinstance(tensors, akg.tvm.container.Array):
- list_tensors = [x for x in tensors]
- else:
- if isinstance(tensors, akg.tvm.tensor.Tensor):
- list_tensors = [tensors]
- else:
- list_tensors = []
-
- for a_tensor in list_tensors:
- exported_op_nodes, repeat_name = recursive_collect_nodes(a_tensor, exported_op_nodes, repeat_name)
- recursive_export_nodes_name(a_tensor, f_out, exported_op_nodes)
- exported_edges = recursive_export_edges(a_tensor, f_out, exported_op_nodes, exported_edges)
-
- f_out.write("\n}\n")
-
-
- variable_map = {}
-
-
- def register_variables(name, input, output):
- """register variables as a dictionary."""
- if not isinstance(name, str):
- raise ValueError("key {} is not str.".format(name))
- variable_map[name] = [output, input]
-
-
- def get_variables(name):
- """get variables from dictionary."""
- if isinstance(name, str):
- if not variable_map[name]:
- raise ValueError("value to key {} is empty.".format(name))
- return variable_map[name]
- raise ValueError("key {} is not str.".format(name))
-
-
- @register_node
- class DifferentiationResult(NodeBase):
- """
- Result of differentiation.
-
- Args:
- result (list[tvm.tensor.Tensor]):
- The requested adjoints, i.e. the Jacobians or gradients of the given output
- wrt to the given inputs.
- adjoints (dict[tvm.tensor.Tensor, tvm.tensor.Tensor]):
- A map from tensors to the corresponding adjoints (including internal nodes).
- adjoint_summands (dict[tvm.tensor.Tensor, dict[tvm.tensor.Tensor, tvm.tensor.Tensor]]):
- Single summands of the adjoints.
- """
-
- # Here we convert tvm Maps to dicts because Map compares keys by reference which is
- # wrong for tvm.tensor.Tensors. Hopefully, in the future Map gets fixed somehow, and these properties
- # may be removed then.
-
- @property
- def adjoints(self):
- res = NodeBase.__getattr__(self, 'adjoints')
- return dict(res.items())
-
- @property
- def adjoint_summands(self):
- res = NodeBase.__getattr__(self, 'adjoint_summands')
- return {k: dict(v.items()) for k, v in res.items()}
-
- def _check_not_empty(self):
- if not self.result:
- raise ValueError("The result of differentiation does not contain any explicitly "
- "requested results, so using it as an iterable is probably a mistake. "
- "Please explicitly use res.adjoints to get adjoints or res.result to "
- "get the empty list.")
-
- def __getitem__(self, i):
- self._check_not_empty()
- return self.result[i]
-
- def __len__(self):
- self._check_not_empty()
- return len(self.result)
-
-
- def differentiate(output, inputs=None, head=None, ad_attrs=None, new_pld_array=None, override=None, fdiff=None):
- """
- Perform operator-level automatic differentiation.
-
- Args:
- output (tvm.tensor.Tensor): The tensor to differentiate.
- inputs (list[tvm.tensor.Tensor]): The list of input tensors.
- When the list is empty or None, will perform differentiation with respect to all tensors the output depends
- on (i.e. will compute all adjoints and populate the corresponding dict, but the list of results will be
- empty). Default: None.
- head (tvm.tensor.Tensor): The adjoint of the output.
- in other words, some tensors, by which the Jacobians will be multiplied. Its shape must be of the form
- `prefix + output.shape`. For example, if the shape of `output` is (2, 3), the shape of `head` could
- be (2, 3), (?, 2, 3) and etc.
- If `None` is passed, the identity tensor of shape `output.shape + output.shape` will be used.
- Default: None.
- ad_attrs (dict): The additional attributes for the auto-differentiate computation. Default: None.
- new_pld_array (list): List of additional variables which could be used in differentiation. Default: None.
- override (dict): A dictionary to override differentiation for certain tensors.
- Override is a dictionary with types: {tvm.tensor.Tensor: (list[tvm.tensor.Tensor],
- callable[tvm.tensor.Tensor, list[tvm.tensor.Tensor], tvm.tensor.Tensor, list[tvm.tensor.Tensor]])}.
- This dict maps tensors `t` to pairs `(dependencies, custom_diff)` where `dependencies` is a list of
- tensors which are considered to be inputs of `t` (which may differ from the immediate inputs),
- and `custom_diff` is a custom differentiation function which will be called as
- `custom_diff(t, dependencies, adjoint, new_pld_array)` and should return a list of adjoints
- corresponding to dependencies.
- Note that this function differs from the one required for `fdiff`
- in that it takes a list of inputs instead of a single input
- and returns a list of adjoints instead of a single adjoint. Default: None.
- fdiff (callable[tvm.tensor.Tensor, tvm.tensor.Tensor, tvm.tensor.Tensor, tvm.tensor.Tensor]): The default
- function performing differentiation and multiplication, by default `akg.autodiff.DiffBuildingBlock` is used.
- The function must accept parameters:
-
- - `output` - an output tensor
-
- - `input` - an input tensor
-
- - `head` - the adjoint of the output tensor
-
- - `ad_attrs` - the additional attributes for the auto-differentiate computation
-
- - `new_pld_array` - the additional tensors with information for the auto-differentiate computation
-
- The result should be `head` multiplied by the Jacobians of `output` wrt `input`. Default: None.
-
- Returns:
- DifferentiationResult.
- class DifferentiationResult is used to represent a differentiation result, including:
- - result (list[tvm.tensor.Tensor]):
- The requested adjoints, i.e. the Jacobians or gradients of the given output
- with respect to the given inputs.
-
- - adjoints (dict{tvm.tensor.Tensor: tvm.tensor.Tensor}):
- A dict from tensors to the corresponding adjoints (including internal nodes).
-
- - adjoint_summands (dict{tvm.tensor.Tensor: dict{tvm.tensor.Tensor: tvm.tensor.Tensor}}):
- Single summands of the adjoints.
-
- Raises:
- ValueError: If the shape of `head` is invalid.
-
- Examples:
- >>> x = akg.tvm.placeholder((32, 3, 28, 28), name='x')
- >>> w1 = akg.tvm.placeholder((10, 3, 3, 3), name='w1')
- >>> z1 = akg.topi.nn.conv2d(x, w1, 1, 0, 1)
- >>> z2 = akg.topi.nn.flatten(z1)
- >>> y = akg.topi.sum(z2)
- >>>
- >>> # produce gradients
- >>> [dw1, dw2] = akg.differentiate(y, [x, w1])
- >>>
- >>> # produce Jacobians
- >>> [jw1, jw2] = akg.differentiate(z2, [x, w1])
- >>>
- >>> # produce Jacobians, the head adjoint for z2 is provided manually
- >>> [dw1, dw2] = akg.differentiate(z2, [x, w1], akg.topi.full_like(z2, 1.0))
- >>>
- >>> # produce gradients wrt all inputs
- >>> res = akg.differentiate(y)
- >>> dw1 = res.adjoints[x]
- >>> dw2 = res.adjoints[w1]
- >>>
- >>> # a custom differentiation function
- >>> head = akg.tvm.placeholder((1,), name = 'head')
- >>> def my_fdiff(out, inp, head, ad_attrs, new_pld_array):
- >>> return [akg.tvm.compute(inp[0].shape, lambda ax0, ax1, ax2, ax3: head[ax0, ax3 + ax2*26 + ax1*676])]
- >>>
- >>> # using a custom differentiation function only for z2
- >>> res = akg.differentiate(y, [x, w1], head, None, None, override={z2: ([z1], my_fdiff)})
- """
-
- # check whether head shape is compatible with output shape.
- if head is not None:
- output_shape = get_shape(output)
- head_shape = get_shape(head)
- output_dim = len(output_shape)
- head_last_shape = head_shape[-output_dim:]
- if head_last_shape != output_shape:
- raise ValueError("operands could not be broadcast together with head shape %s and output shape %s" %
- (str(head_shape), str(output_shape)))
-
- if inputs is None:
- inputs = []
-
- if override is not None:
- override_deps = []
-
- if fdiff is None:
- fdiff = DiffBuildingBlock
-
- if override is not None:
- def modified_fdiff(out, inp, head, ad_attrs, new_pld_array, override=override, old_fdiff=fdiff, cache=None):
- if cache is None:
- cache = {}
- if out in override:
- if (out, head) not in cache:
- cache[(out, head)] = override[out][1](out, override[out][0], head, ad_attrs, new_pld_array)
- idx = override[out][0].index(inp)
- return cache[(out, head)][idx]
- return old_fdiff(out, inp, head, ad_attrs, new_pld_array)
-
- fdiff = modified_fdiff
-
- override_deps = {t: deps for t, (deps, _) in override.items()}
- return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, None, fdiff, override_deps)
-
- if new_pld_array is None:
- return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, [], fdiff)
- return akg.autodiff.Differentiate(output, inputs, head, ad_attrs, new_pld_array, fdiff)
|