|
- from __future__ import absolute_import
- import numpy as np
- from .Node import Op
- from .._base import DNNL_LIB
- from ..cpu_links import softmax as cpu_softmax
- from ..gpu_links import CuDNN_softmax
- from ..gpu_links import CuDNN_softmax_gradient
-
-
- def softmax_func(y):
- """Numerically stable softmax."""
- b = y - np.max(y, axis=-1, keepdims=True)
- expb = np.exp(b)
- softmax = expb / np.sum(expb, axis=-1, keepdims=True)
- return softmax
-
-
- def softmax_gradient_func(y, dy):
- dx = y * (dy - (dy * y).sum(axis=-1, keepdims=True))
- return dx
-
-
- class SoftmaxOp(Op):
- def __init__(self, node_A, ctx=None):
- super().__init__(SoftmaxOp, [node_A], ctx)
-
- def compute(self, input_vals, output_val, stream_handle=None):
- if self.on_cpu:
- if DNNL_LIB['DnnlSoftmax']:
- cpu_softmax(input_vals[0], output_val)
- else:
- output_val[:] = softmax_func(input_vals[0].asnumpy())
- else:
- CuDNN_softmax(input_vals[0], output_val, stream_handle)
-
- def gradient(self, output_grad):
- # Do not directly use SoftmaxOp, use SoftmaxCrossEntropyOp instead.
- # Not allowing taking 2nd derivative of SoftmaxCrossEntropyOp.
- return [softmax_gradient_op(self, output_grad, ctx=self.raw_ctx)]
-
- def infer_shape(self, input_shapes):
- assert len(input_shapes) == 1
- return input_shapes[0]
-
-
- class SoftmaxGradientOp(Op):
- def __init__(self, node_y, grad, ctx=None):
- super().__init__(SoftmaxGradientOp, [node_y, grad], ctx)
-
- def compute(self, input_vals, output_val, stream_handle=None):
- if self.on_cpu:
- output_val[:] = softmax_gradient_func(
- input_vals[0].asnumpy(), input_vals[1].asnumpy())
- else:
- CuDNN_softmax_gradient(
- input_vals[0], input_vals[1], output_val, stream_handle)
-
- def gradient(self, output_grad):
- raise NotImplementedError
-
- def infer_shape(self, input_shapes):
- assert len(input_shapes) == 2
- return input_shapes[0]
-
-
- def softmax_op(node, ctx=None):
- """ This function computes its softmax along an axis.
-
- Parameters:
- ----
- node : Node
- Input variable.
-
- Returns:
- ----
- A new Node instance created by Op.
-
- """
- return SoftmaxOp(node, ctx=ctx)
-
-
- def softmax_gradient_op(node_y, grad, ctx=None):
- """ This function computes softmax gradient.
-
- Parameters:
- ----
- node_y: Node
- Output variable of forward softmax.
- grad: Node
- Gradient variable, dy.
-
- Returns:
- ----
- A new Node instance created by Op.
-
- """
- return SoftmaxGradientOp(node_y, grad, ctx=ctx)
|