zzy34407230
/
mindspore2022

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""cache_ops"""
from ..._checkparam import Validator as validator
from ...common import dtype as mstype
from ..primitive import PrimitiveWithInfer, prim_attr_register
from .. import signature as sig


class UpdateCache(PrimitiveWithInfer):
    """
    Update the value fo input_x, similar to ScatterNdUpdate.
    The diffirent is that UpdateCache will not update when indices < 0 or indices >= max_num.

    Inputs:
        - **input_x** (Parameter) - Parameter which is going to be updated.
        - **indices** (Tensor) - Update indices of input_x.
        - **updates** (Tensor) - The update values.

    Outputs:
        - **out** (Tensor) - Returns a [1] Tensor, which is not usefull.
    """
    __mindspore_signature__ = (
        sig.make_sig('input_x', sig.sig_rw.RW_WRITE,
                     dtype=sig.sig_dtype.T),
        sig.make_sig('indices', dtype=sig.sig_dtype.T1),
        sig.make_sig('updates', dtype=sig.sig_dtype.T),
        sig.make_sig('max_num', dtype=sig.sig_dtype.T1)
    )

    @prim_attr_register
    def __init__(self):
        """init UpdateCache"""

        self.init_prim_io_names(inputs=['input_x', 'indices', 'update', 'max_num'],
                                outputs=['out'])

    def infer_shape(self, input_x_shape, indices_shape, update_shape, max_num_shape):

        if len(indices_shape) < 2:
            raise ValueError("The dimension of 'indices' in UpdateCache must >= 2, "
                             "but got %d." % len(indices_shape))
        return [1]

    def infer_dtype(self, input_x_dtype, indices_dtype, update_dtype, max_num_dtype):
        args = {"indices": indices_dtype}
        validator.check_tensor_type_same(args, mstype.int_type, self.name)
        return input_x_dtype


class SearchCacheIdx(PrimitiveWithInfer):
    """
    Search the keys of a hashmap, and return the values.

    Inputs:
        - **hashmap** (Parameter) - The dim of hashmap is (n, 4), which cols represent the `key, value, step, tag`.
        `key, value`: Map the indices of big table and cache table.
        `step`: The resent step, when searching the key, it will be updated at the same time.
        `step` can make sure the indices which are using in the last step will not be deleted in hashmap.
        `tag`: We use linear probing(`h(k, i) = (h(k) + i) % m`) to solve hash conflicts.
         tag is the count of linear probing times of the key. If `tag == 0`, means that the entry is empty.
        The Hash Function is:
         `((0.6180339 * key) - floor(0.618033 * key)) * hashmap_length`, in order to avoid data clustering.
        - **indices** (Tensor) - The indices which are keys of hashmap.
        - **step** (int) - The current step when searching.
        - **emb_max_num** (int) - Max length of big table.
         To avoid searching when `indices >= emb_max_num`, and make value = `cache_max_num`.
        - **cache_max_num** (int) - Max length of cache table.

    Outputs:
        - **cache_idx** (Tensor) - Result of searched value, if search missed, value = -1.
        - **miss_idx** (Tensor) - The index of Tensor indices which search missed.
         If search success, miss_idx[i] = -1.
        - **miss_emb_idx** (Tensor) - The value of Tensor indices which search missed.
         If search success, miss_emb_idx[i] = -1.
    Examples:
        >>> hashmap = Parameter(Tensor(np.array([[0, 0, 0, 0],
                                                [10, 5, -5, 1],
                                                [2, 1, -5, 1],
                                                [15, 7, -5, 2],
                                                [0, 0, 0, 0],
                                                [0, 0, 0, 0],
                                                [0, 0, 0, 0],
                                                [0, 0, 0, 0],
                                                [3, 3, -5, 1],
                                                [21, 9, -5, 1]], np.int32)), name="hashmap")
        >>> indices = Tensor(np.array([10, 2, 25, 5, 3], np.int32))
        >>> step = 0, emb_max_num = 25, cache_max_num = 10
        >>> ops = P.SearchCacheIdx()
        >>> cache_idx, miss_idx, miss_emb_idx = ops(hashmap, indices, step, emb_max_num, cache_max_num)
        cache_idx : [5, 1, 10, -1, 3]
        miss_idx : [-1, -1, -1, 3, -1]
        miss_emb_idx : [-1, -1, -1, 5, -1]
        hashmap after search : [[0, 0, 0, 0],
                                [10, 5, 0, 1],
                                [2, 1, 0, 1],
                                [15, 7, -5, 2],
                                [0, 0, 0, 0],
                                [0, 0, 0, 0],
                                [0, 0, 0, 0],
                                [0, 0, 0, 0],
                                [3, 3, 0, 1],
                                [21, 9, -5, 1]]
    """
    __mindspore_signature__ = (
        sig.make_sig('hashmap', sig.sig_rw.RW_WRITE,
                     dtype=sig.sig_dtype.T),
        sig.make_sig('indices', dtype=sig.sig_dtype.T),
        sig.make_sig('step', dtype=sig.sig_dtype.T),
        sig.make_sig('emb_max_num', dtype=sig.sig_dtype.T),
        sig.make_sig('cache_max_num', dtype=sig.sig_dtype.T)
    )

    @prim_attr_register
    def __init__(self):
        """init SearchCacheIdx"""

        self.init_prim_io_names(inputs=['hashmap', 'indices', 'step', 'emb_max_num', 'cache_max_num'],
                                outputs=['cache_idx', 'miss_idx', 'miss_emb_idx'])

    def infer_shape(self, hashmap_shape, indices_shape, step_shape, emb_max_num_shape, cache_max_num_shape):

        if len(hashmap_shape) != 2:
            raise ValueError("The dimension of 'hashmap' in SearchCacheIdx must be 2, "
                             "but got %d." % len(hashmap_shape))
        out_shape = (indices_shape, indices_shape, indices_shape)
        return out_shape

    def infer_dtype(self, hashmap_dtype, indices_dtype, step_dtype, emb_max_num_dtype, cache_max_num_dtype):
        args = {"hashmap": hashmap_dtype, "indices": indices_dtype}
        validator.check_tensor_type_same(args, mstype.int_type, self.name)
        out_dtype = (hashmap_dtype, hashmap_dtype, hashmap_dtype)
        return out_dtype


class CacheSwapHashmap(PrimitiveWithInfer):
    """
    Delete a hashmap entry,and insert a new key to hashmap, return the key and value of delete entry.

    Inputs:
        - **hashmap** (Parameter) - Same to operation SearchCacheIdx.
        - **miss_emb_idx** (Tensor) - The keys which are going to insert, -1 is skipped. It is the result
        - **step** (int) - The current step.

    Outputs:
        - **swap_cache_idx** (Tensor) - Deleted value of entry, -1 is skipped.
        - **old_emb_idx** (Tensor) - Deleted key of entry, -1 is skipped.
    """
    __mindspore_signature__ = (
        sig.make_sig('hashmap', sig.sig_rw.RW_WRITE,
                     dtype=sig.sig_dtype.T),
        sig.make_sig('miss_emb_idx', dtype=sig.sig_dtype.T),
        sig.make_sig('step', dtype=sig.sig_dtype.T)
    )

    @prim_attr_register
    def __init__(self):
        """init CacheSwapHashmap"""

        self.init_prim_io_names(inputs=['hashmap', 'miss_emb_idx', 'step'],
                                outputs=['swap_cache_idx', 'old_emb_idx'])

    def infer_shape(self, hashmap_shape, miss_emb_idx_shape, step_shape):

        if len(hashmap_shape) != 2:
            raise ValueError("The dimension of 'hashmap' in CacheSwapHashmap must be 2, "
                             "but got %d." % len(hashmap_shape))

        out_shape = (miss_emb_idx_shape, miss_emb_idx_shape)
        return out_shape

    def infer_dtype(self, hashmap_dtype, miss_emb_idx_dtype, step_dtype):
        args = {"miss_emb_idx": miss_emb_idx_dtype}
        validator.check_tensor_type_same(args, mstype.int_type, self.name)
        out_dtype = (miss_emb_idx_dtype, miss_emb_idx_dtype)
        return out_dtype


class CacheSwapTable(PrimitiveWithInfer):
    """
    Delete a hashmap entry,and insert a new key to hashmap, return the key and value of delete entry.

    Inputs:
        - **cache_table** (Parameter) - The cache table which is on device.
        - **swap_cache_idx** (Tensor) - The index of table which need to swap. -1 is skipped.
        - **miss_value** (int) - The values which arg going to swap into cache table.

    Outputs:
        - **old_value** (Tensor) - The values which are swapped out.
    """
    __mindspore_signature__ = (
        sig.make_sig('cache_table', sig.sig_rw.RW_WRITE,
                     dtype=sig.sig_dtype.T),
        sig.make_sig('swap_cache_idx', dtype=sig.sig_dtype.T1),
        sig.make_sig('miss_value', dtype=sig.sig_dtype.T)
    )

    @prim_attr_register
    def __init__(self):
        """init CacheSwapTable"""

        self.init_prim_io_names(inputs=['cache_table', 'swap_cache_idx', 'miss_value'],
                                outputs=['old_value'])

    def infer_shape(self, cache_table_shape, swap_cache_idx_shape, miss_value_shape):
        if len(cache_table_shape) != 2:
            raise ValueError(
                "cache table shape must be 2, but got %d" % len(cache_table_shape))
        if swap_cache_idx_shape + cache_table_shape[1:] != miss_value_shape:
            raise ValueError(
                "swap_cache_idx_shape + cache_table_shape[1:] must equal to miss_value_shape")
        return miss_value_shape

    def infer_dtype(self, cache_table_dtype, swap_cache_idx_dtype, miss_value_dtype):
        args = {"swap_cache_idx": swap_cache_idx_dtype}
        validator.check_tensor_type_same(args, mstype.int_type, self.name)
        return miss_value_dtype


class MapCacheIdx(PrimitiveWithInfer):
    """
    MapCacheIdx merge SearchCacheIdx, CacheSwapHashmap, UpdateCache together.
    When input an indices tensor, it will output the cache indices which search in hashmap.
    """
    __mindspore_signature__ = (
        sig.make_sig('hashmap', sig.sig_rw.RW_WRITE,
                     dtype=sig.sig_dtype.T),
        sig.make_sig('indices', dtype=sig.sig_dtype.T),
        sig.make_sig('step', dtype=sig.sig_dtype.T),
        sig.make_sig('emb_max_num', dtype=sig.sig_dtype.T),
        sig.make_sig('cache_max_num', dtype=sig.sig_dtype.T)
    )

    @prim_attr_register
    def __init__(self):
        """init MapCacheIdx"""

        self.init_prim_io_names(inputs=['hashmap', 'indices', 'step', 'emb_max_num', 'cache_max_num'],
                                outputs=['cache_idx', 'old_emb_idx', 'miss_emb_idx', 'swap_cache_idx'])

    def infer_shape(self, hashmap_shape, indices_shape, step_shape, emb_max_num_shape, cache_max_num_shape):

        if len(hashmap_shape) != 2:
            raise ValueError("The dimension of 'hashmap' in SearchCacheIdx must be 2, "
                             "but got %d." % len(hashmap_shape))
        out_shape = (indices_shape, indices_shape,
                     indices_shape, indices_shape)
        return out_shape

    def infer_dtype(self, hashmap_dtype, indices_dtype, step_dtype, emb_max_num_dtype, cache_max_num_dtype):
        args = {"hashmap": hashmap_dtype, "indices": indices_dtype}
        validator.check_tensor_type_same(args, mstype.int_type, self.name)
        out_dtype = (hashmap_dtype, hashmap_dtype,
                     hashmap_dtype, hashmap_dtype)
        return out_dtype