|
- from hetu.gpu_ops import Variable
- from hetu import cpu_links as cpu_op
- from hetu import gpu_links as gpu_op
- from hetu import ndarray
- import numpy as np
- import ctypes
-
-
- class BaseInit(object):
- def __init__(self, shape):
- self.shape = tuple(shape)
-
- def __call__(self, node, seed, np_rand=None, stream=None):
- self.node = node
- self.seed = seed + node.id
- node.tensor_value = ndarray.empty(self.shape, ctx=node.ctx)
- if ndarray.is_gpu_ctx(node.ctx):
- self.init_on_gpu(stream)
- else:
- self.init_on_cpu(np_rand)
-
- def init_on_gpu(self, stream):
- raise NotImplementedError
-
- def init_on_cpu(self, np_rand):
- raise NotImplementedError
-
- def init_on_ps(self, comm, nid, param_type, init_type, arg1, arg2, seed, opt):
- # param types: Dense 0, Sparse 1, CacheSparse 2
- if param_type == 0:
- length = np.prod(self.shape)
- width = 1
- else:
- assert len(self.shape) == 2
- length = self.shape[0]
- width = self.shape[1]
- comm.InitTensor(nid, ctypes.c_int(param_type), ctypes.c_int(length), ctypes.c_int(width),
- ctypes.c_int(init_type), ctypes.c_double(arg1), ctypes.c_double(arg2), ctypes.c_ulonglong(seed), opt[0], opt[1], opt[2])
-
-
- class ConstantInit(BaseInit):
- def __init__(self, constant, shape):
- super().__init__(shape)
- self.constant = constant
-
- def init_on_gpu(self, stream):
- gpu_op.array_set(self.node.tensor_value, self.constant, stream)
-
- def init_on_cpu(self, np_rand):
- from ._base import DNNL_LIB
- if DNNL_LIB['cpu_ArraySet']:
- cpu_op.array_set(self.node.tensor_value, self.constant)
- else:
- self.node.tensor_value[:] = np.full(
- self.shape, self.constant).astype(np.float32)
-
- def init_on_ps(self, comm, nid, param_type, seed, opt):
- super().init_on_ps(comm, nid, param_type, 0, self.constant, 1.0, seed, opt)
-
-
- class ZerosInit(ConstantInit):
- def __init__(self, shape):
- super().__init__(0.0, shape)
-
-
- class OnesInit(ConstantInit):
- def __init__(self, shape):
- super().__init__(1.0, shape)
-
-
- class UniformInit(BaseInit):
- def __init__(self, low, high, shape):
- super().__init__(shape)
- self.low = low
- self.high = high
-
- def init_on_gpu(self, stream):
- gpu_op.uniform_init(self.node.tensor_value, self.low,
- self.high, self.seed, stream)
-
- def init_on_cpu(self, np_rand):
- from ._base import DNNL_LIB
- if DNNL_LIB['cpu_UniformInit']:
- cpu_op.uniform_init(self.node.tensor_value,
- self.low, self.high, self.seed)
- else:
- self.node.tensor_value[:] = np_rand.uniform(
- low=self.low, high=self.high, size=self.shape).astype(np.float32)
-
- def init_on_ps(self, comm, nid, param_type, seed, opt):
- super().init_on_ps(comm, nid, param_type, 1, self.low, self.high, seed, opt)
-
-
- class GeneralizedXavierUniformInit(UniformInit):
- def __init__(self, gain, mode, shape):
- assert mode in ('fan_in', 'fan_out',
- 'avg'), 'Mode %s not valid.' % mode
- assert gain > 0, 'Gain value %s not valid.' % str(gain)
- assert len(
- shape) >= 2, 'Generalized xavier requires shape to be at least 2D.'
- hw_scale = 1 if len(shape) == 2 else np.prod(shape[2:])
- fan_in = hw_scale * shape[1]
- fan_out = hw_scale * shape[0]
- if mode == 'fan_in':
- factor = fan_in
- elif mode == 'fan_out':
- factor = fan_out
- else:
- factor = (fan_in + fan_out) / 2.0
- limit = np.sqrt(gain / factor)
- super().__init__(-limit, limit, shape)
-
-
- class XavierUniformInit(GeneralizedXavierUniformInit):
- def __init__(self, shape):
- super().__init__(3.0, 'avg', shape)
-
-
- class HeUniformInit(GeneralizedXavierUniformInit):
- def __init__(self, shape):
- super().__init__(6.0, 'fan_in', shape)
-
-
- class LecunUniformInit(GeneralizedXavierUniformInit):
- def __init__(self, shape):
- super().__init__(3.0, 'fan_in', shape)
-
-
- class NormalInit(BaseInit):
- def __init__(self, mean, stddev, shape):
- super().__init__(shape)
- self.mean = mean
- self.stddev = stddev
-
- def init_on_gpu(self, stream):
- gpu_op.normal_init(self.node.tensor_value, self.mean,
- self.stddev, self.seed, stream)
-
- def init_on_cpu(self, np_rand):
- from ._base import DNNL_LIB
- if DNNL_LIB['cpu_NormalInit']:
- cpu_op.normal_init(self.node.tensor_value,
- self.mean, self.stddev, self.seed)
- else:
- self.node.tensor_value[:] = np_rand.normal(
- loc=self.mean, scale=self.stddev, size=self.shape).astype(np.float32)
-
- def init_on_ps(self, comm, nid, param_type, seed, opt):
- super().init_on_ps(comm, nid, param_type, 2, self.mean, self.stddev, seed, opt)
-
-
- class GeneralizedXavierNormalInit(NormalInit):
- def __init__(self, gain, mode, shape):
- assert mode in ('fan_in', 'fan_out', 'avg'), 'Mode not allowed.'
- assert gain > 0, 'Gain value not allowed.'
- assert len(
- shape) >= 2, 'Generalized xavier requires shape to be at least 2D.'
- hw_scale = 1 if len(shape) == 2 else np.prod(shape[2:])
- fan_in = hw_scale * shape[1]
- fan_out = hw_scale * shape[0]
- if mode == 'fan_in':
- factor = fan_in
- elif mode == 'fan_out':
- factor = fan_out
- else:
- factor = (fan_in + fan_out) / 2.0
- scale = np.sqrt(gain / factor)
- super().__init__(0, scale, shape)
-
-
- class XavierNormalInit(GeneralizedXavierNormalInit):
- def __init__(self, shape):
- super().__init__(1.0, 'avg', shape)
-
-
- class HeNormalInit(GeneralizedXavierNormalInit):
- def __init__(self, shape):
- super().__init__(2.0, 'fan_in', shape)
-
-
- class LecunNormalInit(GeneralizedXavierNormalInit):
- def __init__(self, shape):
- super().__init__(1.0, 'fan_in', shape)
-
-
- class TruncatedNormalInit(BaseInit):
- def __init__(self, mean, stddev, shape):
- super().__init__(shape)
- self.mean = mean
- self.stddev = stddev
-
- def init_on_gpu(self, stream):
- gpu_op.truncated_normal_init(
- self.node.tensor_value, self.mean, self.stddev, self.seed, stream)
-
- def init_on_cpu(self, np_rand):
- from ._base import DNNL_LIB
- if DNNL_LIB['cpu_TruncatedNormalInit']:
- cpu_op.truncated_normal_init(
- self.node.tensor_value, self.mean, self.stddev, self.seed)
- else:
- # this function cannot use np_rand
- from scipy.stats import truncnorm
- self.node.tensor_value[:] = truncnorm(
- -2.0, 2.0, loc=self.mean, scale=self.stddev).rvs(self.shape).astype(np.float32)
-
- def init_on_ps(self, comm, nid, param_type, seed, opt):
- super().init_on_ps(comm, nid, param_type, 3, self.mean, self.stddev, seed, opt)
-
-
- # here we provide easy APIs
-
-
- def zeros(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'zeros_initializer'
- init = ZerosInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def ones(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'ones_initializer'
- init = OnesInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def constant(shape, fill_value=0.0, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'constant_initializer'
- init = ConstantInit(fill_value, shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def truncated_normal(shape, mean=0.0, stddev=1.0, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'truncated_normal_initializer'
- init = TruncatedNormalInit(mean, stddev, shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def random_normal(shape, mean=0.0, stddev=1.0, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'random_normal_initializer'
- init = NormalInit(mean, stddev, shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def random_uniform(shape, minval=-1.0, maxval=1.0, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'random_uniform_initializer'
- init = UniformInit(minval, maxval, shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def xavier_normal(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'xavier_normal_initializer'
- init = XavierNormalInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def xavier_uniform(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'xavier_uniform_initializer'
- init = XavierUniformInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def he_normal(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'he_normal_initializer'
- init = HeNormalInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def he_uniform(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'he_uniform_initializer'
- init = HeUniformInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def lecun_normal(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'lecun_normal_initializer'
- init = LecunNormalInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
-
-
- def lecun_uniform(shape, name=None, trainable=True, ctx=None):
- if name is None:
- name = 'lecun_uniform_initializer'
- init = LecunUniformInit(shape)
- return Variable(name=name, initializer=init, trainable=trainable, ctx=ctx)
|