|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- # Copyright 2020 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """Neural Collaborative Filtering Model"""
- from mindspore import nn
- from mindspore import Tensor, Parameter, ParameterTuple
- from mindspore._checkparam import Validator as validator
- from mindspore.nn.layer.activation import get_activation
- import mindspore.common.dtype as mstype
- from mindspore.ops import operations as P
- from mindspore.common.initializer import initializer
- from mindspore.ops import functional as F
- from mindspore.ops import composite as C
- from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean
- from mindspore.context import ParallelMode
- from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-
- from src.lr_schedule import dynamic_lr
-
- class DenseLayer(nn.Cell):
- """
- Dense layer definition
- """
- def __init__(self,
- in_channels,
- out_channels,
- weight_init='normal',
- bias_init='zeros',
- has_bias=True,
- activation=None):
- super(DenseLayer, self).__init__()
- self.in_channels = validator.check_positive_int(in_channels)
- self.out_channels = validator.check_positive_int(out_channels)
- self.has_bias = validator.check_bool(has_bias)
-
- if isinstance(weight_init, Tensor):
- if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
- weight_init.shape()[1] != in_channels:
- raise ValueError("weight_init shape error")
-
- self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))
-
- if self.has_bias:
- if isinstance(bias_init, Tensor):
- if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
- raise ValueError("bias_init shape error")
-
- self.bias = Parameter(initializer(bias_init, [out_channels]))
-
- self.matmul = P.MatMul(transpose_b=True)
- self.bias_add = P.BiasAdd()
- self.cast = P.Cast()
-
- self.activation = get_activation(activation)
- self.activation_flag = self.activation is not None
-
- def construct(self, x):
- """
- dense layer construct method
- """
- x = self.cast(x, mstype.float16)
- weight = self.cast(self.weight, mstype.float16)
- bias = self.cast(self.bias, mstype.float16)
-
- output = self.matmul(x, weight)
- if self.has_bias:
- output = self.bias_add(output, bias)
- if self.activation_flag:
- output = self.activation(output)
- output = self.cast(output, mstype.float32)
- return output
-
- def extend_repr(self):
- """A pretty print for Dense layer."""
- str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \
- .format(self.in_channels, self.out_channels, self.weight, self.has_bias)
- if self.has_bias:
- str_info = str_info + ', bias={}'.format(self.bias)
-
- if self.activation_flag:
- str_info = str_info + ', activation={}'.format(self.activation)
-
- return str_info
-
-
- class NCFModel(nn.Cell):
- """
- Class for Neural Collaborative Filtering Model from paper " Neural Collaborative Filtering".
- """
-
- def __init__(self,
- num_users,
- num_items,
- num_factors,
- model_layers,
- mf_regularization,
- mlp_reg_layers,
- mf_dim):
- super(NCFModel, self).__init__()
-
- self.data_path = ""
- self.model_path = ""
-
- self.num_users = num_users
- self.num_items = num_items
- self.num_factors = num_factors
- self.model_layers = model_layers
-
- self.mf_regularization = mf_regularization
- self.mlp_reg_layers = mlp_reg_layers
-
- self.mf_dim = mf_dim
-
- self.num_layers = len(self.model_layers) # Number of layers in the MLP
-
- if self.model_layers[0] % 2 != 0:
- raise ValueError("The first layer size should be multiple of 2!")
-
- # Initializer for embedding layers
- self.embedding_initializer = "normal"
-
- self.embedding_user = nn.Embedding(
- self.num_users,
- self.num_factors + self.model_layers[0] // 2,
- embedding_table=self.embedding_initializer
- )
- self.embedding_item = nn.Embedding(
- self.num_items,
- self.num_factors + self.model_layers[0] // 2,
- embedding_table=self.embedding_initializer
- )
-
- self.mlp_dense1 = DenseLayer(in_channels=self.model_layers[0],
- out_channels=self.model_layers[1],
- activation="relu")
- self.mlp_dense2 = DenseLayer(in_channels=self.model_layers[1],
- out_channels=self.model_layers[2],
- activation="relu")
-
- # Logit dense layer
- self.logits_dense = DenseLayer(in_channels=self.model_layers[1],
- out_channels=1,
- weight_init="normal",
- activation=None)
-
- # ops definition
- self.mul = P.Mul()
- self.squeeze = P.Squeeze(axis=1)
- self.concat = P.Concat(axis=1)
-
- def construct(self, user_input, item_input):
- """
- NCF construct method.
- """
- # GMF part
- # embedding_layers
- embedding_user = self.embedding_user(user_input) # input: (256, 1) output: (256, 1, 16 + 32)
- embedding_item = self.embedding_item(item_input) # input: (256, 1) output: (256, 1, 16 + 32)
-
- mf_user_latent = self.squeeze(embedding_user)[:, :self.num_factors] # input: (256, 1, 16 + 32) output: (256, 16)
- mf_item_latent = self.squeeze(embedding_item)[:, :self.num_factors] # input: (256, 1, 16 + 32) output: (256, 16)
-
- # MLP part
- mlp_user_latent = self.squeeze(embedding_user)[:, self.mf_dim:] # input: (256, 1, 16 + 32) output: (256, 32)
- mlp_item_latent = self.squeeze(embedding_item)[:, self.mf_dim:] # input: (256, 1, 16 + 32) output: (256, 32)
-
- # Element-wise multiply
- mf_vector = self.mul(mf_user_latent, mf_item_latent) # input: (256, 16), (256, 16) output: (256, 16)
-
- # Concatenation of two latent features
- mlp_vector = self.concat((mlp_user_latent, mlp_item_latent)) # input: (256, 32), (256, 32) output: (256, 64)
-
- # MLP dense layers
- mlp_vector = self.mlp_dense1(mlp_vector) # input: (256, 64) output: (256, 32)
- mlp_vector = self.mlp_dense2(mlp_vector) # input: (256, 32) output: (256, 16)
-
- # # Concatenate GMF and MLP parts
- predict_vector = self.concat((mf_vector, mlp_vector)) # input: (256, 16), (256, 16) output: (256, 32)
-
- # Final prediction layer
- logits = self.logits_dense(predict_vector) # input: (256, 32) output: (256, 1)
-
- # Print model topology.
- return logits
-
-
- class NetWithLossClass(nn.Cell):
- """
- NetWithLossClass definition
- """
- def __init__(self, network):
- super(NetWithLossClass, self).__init__(auto_prefix=False)
- #self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
- self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
- self.network = network
- self.reducesum = P.ReduceSum(keep_dims=False)
- self.mul = P.Mul()
- self.squeeze = P.Squeeze(axis=1)
- self.zeroslike = P.ZerosLike()
- self.concat = P.Concat(axis=1)
- self.reciprocal = P.Reciprocal()
-
- def construct(self, batch_users, batch_items, labels, valid_pt_mask):
- predict = self.network(batch_users, batch_items)
- predict = self.concat((self.zeroslike(predict), predict))
- labels = self.squeeze(labels)
- loss = self.loss(predict, labels)
- loss = self.mul(loss, self.squeeze(valid_pt_mask))
- mean_loss = self.mul(self.reducesum(loss), self.reciprocal(self.reducesum(valid_pt_mask)))
- return mean_loss
-
-
- class TrainStepWrap(nn.Cell):
- """
- TrainStepWrap definition
- """
- def __init__(self, network, total_steps=1, sens=16384.0):
- super(TrainStepWrap, self).__init__(auto_prefix=False)
- self.network = network
- self.network.set_train()
- self.network.add_flags(defer_inline=True)
- self.weights = ParameterTuple(network.trainable_params())
-
- lr = dynamic_lr(0.01, total_steps, 5000)
- self.optimizer = nn.Adam(self.weights,
- learning_rate=lr,
- beta1=0.9,
- beta2=0.999,
- eps=1e-8,
- loss_scale=sens)
-
- self.hyper_map = C.HyperMap()
- self.grad = C.GradOperation(get_by_list=True, sens_param=True)
- self.sens = sens
-
- self.reducer_flag = False
- self.grad_reducer = None
- parallel_mode = _get_parallel_mode()
- if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
- self.reducer_flag = True
- if self.reducer_flag:
- mean = _get_gradients_mean()
- degree = _get_device_num()
- self.grad_reducer = DistributedGradReducer(self.optimizer.parameters, mean, degree)
-
-
- def construct(self, batch_users, batch_items, labels, valid_pt_mask):
- weights = self.weights
- loss = self.network(batch_users, batch_items, labels, valid_pt_mask)
- sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) #
- grads = self.grad(self.network, weights)(batch_users, batch_items, labels, valid_pt_mask, sens)
- if self.reducer_flag:
- # apply grad reducer on grads
- grads = self.grad_reducer(grads)
- return F.depend(loss, self.optimizer(grads))
-
-
- class PredictWithSigmoid(nn.Cell):
- """
- Predict definition
- """
- def __init__(self, network, k, num_eval_neg):
- super(PredictWithSigmoid, self).__init__()
- self.network = network
- self.topk = P.TopK(sorted=True)
- self.squeeze = P.Squeeze()
- self.k = k
- self.num_eval_neg = num_eval_neg
- self.gather = P.GatherV2()
- self.reshape = P.Reshape()
- self.reducesum = P.ReduceSum(keep_dims=False)
- self.notequal = P.NotEqual()
-
- def construct(self, batch_users, batch_items, duplicated_masks):
- predicts = self.network(batch_users, batch_items) # (bs, 1)
- predicts = self.reshape(predicts, (-1, self.num_eval_neg + 1)) # (num_user, 100)
- batch_items = self.reshape(batch_items, (-1, self.num_eval_neg + 1)) # (num_user, 100)
- duplicated_masks = self.reshape(duplicated_masks, (-1, self.num_eval_neg + 1)) # (num_user, 100)
- masks_sum = self.reducesum(duplicated_masks, 1)
- metric_weights = self.notequal(masks_sum, self.num_eval_neg) # (num_user)
- _, indices = self.topk(predicts, self.k) # (num_user, k)
-
- return indices, batch_items, metric_weights
|