sponge ops nb update others

4 years ago · 44189d25bf
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
@@ -13,51 +13,46 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 /**
 *Note:
 *  NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
 #include <vector>
 __global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < element_numbers) {
    list[i] = origin_list[i];
  }
 }
 __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {

 // common functions

 static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < element_numbers) {
    list[i] = origin_list[i];
  }
 }

 __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
                                UNSIGNED_INT_VECTOR *uint_crd) {
 static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
                                       UNSIGNED_INT_VECTOR *uint_crd) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
    uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
    uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
    uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
    uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
    uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
  }
 }
    INT_VECTOR tempi;
    VECTOR temp = crd[atom_i];

 __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < vector_numbers) {
    vec_list[i].x = vec_list[i].x + translation_vec.x;
    vec_list[i].y = vec_list[i].y + translation_vec.y;
    vec_list[i].z = vec_list[i].z + translation_vec.z;
  }
 }
 __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < vector_numbers) {
    vec_list[i].x = vec_list[i].x + translation_vec[0].x;
    vec_list[i].y = vec_list[i].y + translation_vec[0].y;
    vec_list[i].z = vec_list[i].z + translation_vec[0].z;
    temp.x *= scale_factor[0];
    temp.y *= scale_factor[1];
    temp.z *= scale_factor[2];

    tempi.int_x = temp.x;
    tempi.int_y = temp.y;
    tempi.int_z = temp.z;

    uint_crd[atom_i].uint_x = (tempi.int_x << 2);
    uint_crd[atom_i].uint_y = (tempi.int_y << 2);
    uint_crd[atom_i].uint_z = (tempi.int_z << 2);
  }
 }
 __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {

 static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    if (crd[atom_i].x >= 0) {
@@ -77,7 +72,6 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa
    } else {
      crd[atom_i].y = crd[atom_i].y + box_length[1];
    }

    if (crd[atom_i].z >= 0) {
      if (crd[atom_i].z < box_length[2]) {
      } else {
@@ -89,7 +83,8 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa
  }
 }

 __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) {
 static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket,
                                         GRID_BUCKET *bucket) {
  int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
  if (grid_serial < grid_numbers) {
    GRID_BUCKET bucket_i = bucket[grid_serial];
@@ -100,20 +95,12 @@ __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_g
  }
 }

 __global__ void Clear_Neighbor_List_Serial(const int atom_numbers, int max_neighbor_number, NEIGHBOR_LIST *nl) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    for (int i = 0; i < max_neighbor_number; i ++) {
      nl[atom_i].atom_serial[i] = atom_numbers;
    }
  }
 }

 __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd,
                                         const int *grid_N, const int gridxy, int *atom_in_grid_serial) {
 static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse,
                                                const VECTOR *crd, const int *grid_N, const int gridxy,
                                                int *atom_in_grid_serial) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];  // crd.x must < boxlength.x
    int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];
    int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
    int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
    Nx = Nx & ((Nx - grid_N[0]) >> 31);
@@ -123,8 +110,8 @@ __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *gr
  }
 }

 __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket,
                                        int *atom_numbers_in_grid_bucket) {
 static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial,
                                               GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    int grid_serial = atom_in_grid_serial[atom_i];
@@ -145,11 +132,12 @@ __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_
    }
  }
 }
 __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
                                    const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
                                    const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
                                    const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
                                    const float cutoff_skin_square) {

 static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
                                           const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
                                           const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
                                           const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
                                           const float cutoff_skin_square) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    int grid_serial = atom_in_grid_serial[atom_i];
@@ -183,30 +171,15 @@ __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_V
          }
        }
      }
    }  // 124 grid cycle
    nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
  }
 }

 __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
                                                   const float half_skin_square, int *need_refresh_flag) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < atom_numbers) {
    VECTOR r1 = crd[i];
    VECTOR r2 = old_crd[i];
    r1.x = r1.x - r2.x;
    r1.y = r1.y - r2.y;
    r1.z = r1.z - r2.z;
    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
    if (r1_2 > half_skin_square) {
      atomicExch(&need_refresh_flag[0], 1);
    }
    nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
  }
 }

 __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
                                                              const int *excluded_list_start, const int *excluded_list,
                                                              const int *excluded_atom_numbers) {
 static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
                                                                     const int *excluded_list_start,
                                                                     const int *excluded_list,
                                                                     const int *excluded_atom_numbers) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    int excluded_number = excluded_atom_numbers[atom_i];
@@ -236,29 +209,113 @@ __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_num
          if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
          } else {
            break;
          }  // break
        }    // in the range of excluded min to max
      }      // cycle for neighbors
          }
        }
      }
      nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
    }  // if need excluded
    }
  }
 }

 void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
                           UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
                           int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
                           GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
                           int *excluded_list_start, int *excluded_list, int *excluded_numbers,
                           float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
                           int max_neighbor_number, cudaStream_t stream) {
  if (refresh_sign[0] == 1) {
 static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
                                                      int *nl_atom_serial, NEIGHBOR_LIST *nl) {
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
    nl[i].atom_numbers = nl_atom_numbers[i];
    nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
  }
 }

 static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
                                                      int *nl_atom_numbers, int *nl_atom_serial) {
  int i, j;
  for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
    nl_atom_numbers[i] = nl[i].atom_numbers;
    for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
      if (j < nl_atom_numbers[i]) {
        nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
      } else {
        nl_atom_serial[i * max_neighbor_numbers + j] = 0;
      }
    }
  }
 }

 static __global__ void Mul_half(float *src, float *dst) {
  int index = threadIdx.x;
  if (index < 3) {
    dst[index] = src[index] * 0.5;
  }
 }

 static __global__ void Mul_quarter(float *src, float *dst) {
  int index = threadIdx.x;
  if (index < 3) {
    dst[index] = src[index] * 0.25;
  }
 }

 // old neighbor list update functions
 __global__ void Crd_To_Uint_Crd_Half(const int atom_numbers, float *scale_factor, const VECTOR *crd,
                                     UNSIGNED_INT_VECTOR *uint_crd) {
  int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
  if (atom_i < atom_numbers) {
    uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
    uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
    uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
    uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
    uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
    uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
  }
 }

 __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < vector_numbers) {
    vec_list[i].x = vec_list[i].x + translation_vec.x;
    vec_list[i].y = vec_list[i].y + translation_vec.y;
    vec_list[i].z = vec_list[i].z + translation_vec.z;
  }
 }

 __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < vector_numbers) {
    vec_list[i].x = vec_list[i].x + translation_vec[0].x;
    vec_list[i].y = vec_list[i].y + translation_vec[0].y;
    vec_list[i].z = vec_list[i].z + translation_vec[0].z;
  }
 }

 __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
                                                   const float half_skin_square, int *need_refresh_flag) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < atom_numbers) {
    VECTOR r1 = crd[i];
    VECTOR r2 = old_crd[i];
    r1.x = r1.x - r2.x;
    r1.y = r1.y - r2.y;
    r1.z = r1.z - r2.z;
    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
    if (r1_2 > half_skin_square) {
      atomicExch(&need_refresh_flag[0], 1);
    }
  }
 }

 void Refresh_Neighbor_List_Half(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
                                VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
                                float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, float *box_length,
                                const GRID_POINTER *gpointer, GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket,
                                NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
                                int *excluded_numbers, float cutoff_skin_square, int grid_numbers,
                                float *grid_length_inverse, int *grid_N, int nxy, cudaStream_t stream) {
  std::vector<int> h_refresh_sign(1);
  cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
  if (h_refresh_sign[0] == 1) {
    VECTOR trans_vec = {-skin, -skin, -skin};
    Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
      grid_numbers, atom_numbers_in_grid_bucket, bucket);

    Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, max_neighbor_number, d_nl);

    Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
                                                                                                trans_vec);

@@ -266,7 +323,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
                                                                                              box_length);

    Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
      atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

    trans_vec.x = -trans_vec.x;
    trans_vec.y = -trans_vec.y;
@@ -281,7 +338,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
    Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

    Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
    Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

    Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
@@ -291,7 +348,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
    Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
                                                    stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
                                                              excluded_numbers);
    refresh_sign[0] = 0;
    h_refresh_sign[0] = 0;
  }
 }

@@ -301,23 +358,21 @@ void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const
                                      float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
                                      int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
                                      int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
                                      int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
                                      int max_neighbor_number, cudaStream_t stream) {
                                      int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
                                      cudaStream_t stream) {
  VECTOR trans_vec = {skin, skin, skin};
  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
    grid_numbers, atom_numbers_in_grid_bucket, bucket);
  Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, max_neighbor_number, d_nl);
  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
    atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
  Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
  Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
    3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
                                                                                   crd, uint_crd);
  Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
@@ -327,52 +382,42 @@ void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const
                                                            excluded_numbers);
 }

 __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
                                               int *nl_atom_serial, NEIGHBOR_LIST *nl) {
 __global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
    nl[i].atom_numbers = nl_atom_numbers[i];
    nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
    nl_atom_numbers[i] = nl[i].atom_numbers;
  }
 }

 void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                             NEIGHBOR_LIST *nl, cudaStream_t stream) {
 void ConstructNeighborListHalf(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                               NEIGHBOR_LIST *nl, cudaStream_t stream) {
  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
 }

 __global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
    nl_atom_numbers[i] = nl[i].atom_numbers;
  }
 }

 void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
 void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
  copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl,
                                                                                                    nl_atom_numbers);
 }

 void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square,
                                    int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
                                    float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
                                    VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
                                    UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
                                    NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
                                    int *excluded_numbers, int max_neighbor_number, cudaStream_t stream) {
 void Refresh_Neighbor_List_No_Check_Half(int grid_numbers, int atom_numbers, float skin, int nxy,
                                         float cutoff_skin_square, int *grid_N, float *box_length,
                                         int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                                         int *atom_in_grid_serial, GRID_BUCKET *bucket, VECTOR *crd, VECTOR *old_crd,
                                         float *crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
                                         float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
                                         int *excluded_list_start, int *excluded_list, int *excluded_numbers,
                                         cudaStream_t stream) {
  VECTOR trans_vec = {-skin, -skin, -skin};

  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
    grid_numbers, atom_numbers_in_grid_bucket, bucket);

  Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, max_neighbor_number, d_nl);

  Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);

  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);

  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
    atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
  trans_vec.x = -trans_vec.x;
  trans_vec.y = -trans_vec.y;
  trans_vec.z = -trans_vec.z;
@@ -383,8 +428,8 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk
  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
                                                                                   crd, uint_crd);
  Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
@@ -394,21 +439,14 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk
    atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
 }

 __global__ void Mul_half(float *src, float *dst) {
  int index = threadIdx.x;
  if (index < 3) {
    dst[index] = src[index] * 0.5;
  }
 }

 void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
                          int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
                          int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                          int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                          float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                          float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
                          int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
                          int *is_need_refresh_neighbor_list, int max_neighbor_number, cudaStream_t stream) {
 void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
                        int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
                        int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                        int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                        float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                        float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
                        int *excluded_list, int *excluded_numbers, float half_skin_square,
                        int *is_need_refresh_neighbor_list, cudaStream_t stream) {
  if (not_first_time) {
    if (refresh_interval > 0) {
      std::vector<int> refresh_count_list(1);
@@ -418,12 +456,12 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou

      if (refresh_count % refresh_interval == 0) {
        Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
        Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length,
                                       atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
                                       reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
                                       half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
                                       uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
                                       excluded_numbers, max_neighbor_number, stream);
        Refresh_Neighbor_List_No_Check_Half(
          grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
          grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
          reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof,
          reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start,
          excluded_list, excluded_numbers, stream);
      }
      refresh_count += 1;
      cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream);
@@ -432,12 +470,12 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou
        atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
        is_need_refresh_neighbor_list);
      Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
      Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
                            reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
                            half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
                            gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
                            excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
                            max_neighbor_number, stream);
      Refresh_Neighbor_List_Half(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
                                 reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
                                 half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
                                 gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start,
                                 excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers,
                                 grid_length_inverse, grid_N, nxy, stream);
    }
  } else {
    Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
@@ -446,6 +484,161 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou
      reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
      uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
      excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
      grid_N, Nxy, max_neighbor_number, stream);
      grid_N, nxy, stream);
  }
 }

 // new neighbor list update functions

 __device__ __host__ VECTOR Get_Periodic_Displacement_Update(const VECTOR vec_a, const VECTOR vec_b,
                                                            const VECTOR box_length) {
  VECTOR dr;
  dr.x = vec_a.x - vec_b.x;
  dr.y = vec_a.y - vec_b.y;
  dr.x = vec_a.z - vec_b.z;

  dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
  dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
  dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
  return dr;
 }

 __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
                                                   const VECTOR *box_length, const float half_skin_square,
                                                   int *need_refresh_flag) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < atom_numbers) {
    VECTOR r1 = crd[i];
    VECTOR r2 = old_crd[i];
    r1 = Get_Periodic_Displacement_Update(r1, r2, box_length[0]);
    float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
    if (r1_2 > half_skin_square) {
      atomicExch(&need_refresh_flag[0], 1);
    }
  }
 }

 void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
                           UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
                           int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
                           GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
                           int *excluded_list_start, int *excluded_list, int *excluded_numbers,
                           float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
                           cudaStream_t stream) {
  std::vector<int> h_refresh_sign(1);
  cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
  if (h_refresh_sign[0] == 1) {
    Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
      grid_numbers, atom_numbers_in_grid_bucket, bucket);

    Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
                                                                                              box_length);

    Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

    Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
      3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));

    Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

    Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

    Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
      atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
      d_nl, cutoff_skin_square);

    Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
                                                    stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
                                                              excluded_numbers);
    h_refresh_sign[0] = 0;
  }
 }

 void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int nxy, float cutoff_skin_square,
                                    int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
                                    float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
                                    VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
                                    UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
                                    NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
                                    int *excluded_numbers, cudaStream_t stream) {
  Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
    grid_numbers, atom_numbers_in_grid_bucket, bucket);

  Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);

  Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

  cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);

  Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

  Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
                                                                                   crd, uint_crd);

  Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
    cutoff_skin_square);

  Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
    atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
 }

 void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
                      int *nl_atom_serial, cudaStream_t stream) {
  copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
    atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
 }

 void ConstructNeighborList(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                           NEIGHBOR_LIST *nl, cudaStream_t stream) {
  construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
    atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
 }

 int refresh_count = 0;

 void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
                         int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
                         int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                         int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                         float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                         float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
                         int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
                         int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream) {
  if (forced_update) {
    Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
    Refresh_Neighbor_List_No_Check(
      grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
      grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
      reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
      uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);

  } else if (refresh_interval > 0 && !forced_check) {
    if (refresh_count % refresh_interval == 0) {
      Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
      Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length,
                                     atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
                                     reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
                                     half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
                                     uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
                                     excluded_numbers, stream);
    }
    refresh_count += 1;
  } else {
    Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
      atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
      reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
    Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
    Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
                          reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
                          half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
                          bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
                          excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, nxy,
                          stream);
  }
 }
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
@@ -13,9 +13,15 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
 */

 #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
 #include <curand_kernel.h>
 #include "runtime/device/gpu/cuda_common.h"

 struct VECTOR {
  float x;
@@ -43,18 +49,33 @@ struct GRID_POINTER {
  int *grid_serial;
 };

 void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                             NEIGHBOR_LIST *nl, cudaStream_t stream);
 void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                           NEIGHBOR_LIST *nl, cudaStream_t stream);

 void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
                      int *nl_atom_serial, cudaStream_t stream);

 void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
                         int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
                         int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                         int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                         float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                         float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
                         int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
                         int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream);

 void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
                               NEIGHBOR_LIST *nl, cudaStream_t stream);

 void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream);
 void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream);

 void Neighbor_List_Update(int grid_numbers, int atom_numbers, int* d_refresh_count, int refresh_interval,
                          int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
                          int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                          int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                          float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                          float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
                          int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
                          int *is_need_refresh_neighbor_list, int max_neighbor_number, cudaStream_t stream);
 void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
                        int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
                        int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
                        int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
                        float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
                        float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
                        int *excluded_list, int *excluded_numbers, float half_skin_square,
                        int *is_need_refresh_neighbor_list, cudaStream_t stream);

 #endif
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.cc
@@ -13,13 +13,17 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  Totalc6get. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(
  totalc6get,
  Totalc6get,
  KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  TotalC6GetGpuKernel, float, int)
 }  // namespace kernel
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.cc
@@ -13,12 +13,16 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  CalculateNowrapCrd. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(calculatenowrapcrd,
 MS_REG_GPU_KERNEL_TWO(CalculateNowrapCrd,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeFloat32)
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.cc
@@ -13,12 +13,16 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  RefreshBoxmapTimes. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(refreshboxmaptimes,
 MS_REG_GPU_KERNEL_TWO(RefreshBoxmapTimes,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeFloat32)
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
@@ -13,6 +13,10 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h"

--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
@@ -13,6 +13,10 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
 */

 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
@@ -38,7 +42,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
    refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
    not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
    Nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "Nxy"));
    nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "nxy"));
    excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));

    cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
@@ -62,7 +66,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
    auto bucket = GetDeviceAddress<int>(inputs, 1);
    auto crd = GetDeviceAddress<float>(inputs, 2);
    auto box_length = GetDeviceAddress<float>(inputs, 3);
    auto grid_N = GetDeviceAddress<int>(inputs, 4);
    auto grid_n = GetDeviceAddress<int>(inputs, 4);
    auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
    auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
    auto old_crd = GetDeviceAddress<float>(inputs, 7);
@@ -83,11 +87,9 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
    NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
    float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);

    // std::vector<GRID_BUCKET> h_bucket(grid_numbers);
    for (size_t i = 0; i < h_bucket.size(); i += 1) {
      h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
    }
    // std::vector<GRID_POINTER> h_gpointer(grid_numbers);
    for (size_t i = 0; i < h_gpointer.size(); i += 1) {
      h_gpointer[i].grid_serial = gpointer + i * 125;
    }
@@ -96,16 +98,15 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
                    reinterpret_cast<cudaStream_t>(stream_ptr));
    cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
                    reinterpret_cast<cudaStream_t>(stream_ptr));
    Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
                            reinterpret_cast<cudaStream_t>(stream_ptr));

    Neighbor_List_Update(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, Nxy,
                         cutoff_square, cutoff_with_skin_square, grid_N, box_length, atom_numbers_in_grid_bucket,
                         grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
                         half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
                         excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, max_neighbor_numbers,
                         reinterpret_cast<cudaStream_t>(stream_ptr));
    CopyNeighborListAtomNumber(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
    ConstructNeighborListHalf(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
                              reinterpret_cast<cudaStream_t>(stream_ptr));
    NeighborListUpdate(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
                       cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
                       grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
                       half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
                       excluded_list, excluded_numbers, half_skin_square, need_refresh_flag,
                       reinterpret_cast<cudaStream_t>(stream_ptr));
    CopyNeighborListHalf(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
    return true;
  }

@@ -151,7 +152,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
  int atom_numbers;
  int grid_numbers;
  int refresh_interval;
  int Nxy;
  int nxy;
  int max_atom_in_grid_numbers;
  int max_neighbor_numbers;
  int excluded_atom_numbers;
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
@@ -0,0 +1,49 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  NeighborListRefresh. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(NeighborListRefresh,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeUInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeInt32)
                        .AddOutputAttr(kNumberTypeFloat32),
                      NeighborListUpdateNewGpuKernel, int, float)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h
@@ -0,0 +1,179 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  NeighborListRefresh. This is an experimental interface that is subject to change and/or deletion.
 */

 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_

 #include <cuda_runtime_api.h>
 #include <cufft.h>
 #include <vector>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
 #include "runtime/device/gpu/cuda_common.h"
 #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"

 namespace mindspore {
 namespace kernel {
 template <typename T, typename T1>
 class NeighborListUpdateNewGpuKernel : public GpuKernel {
 public:
  NeighborListUpdateNewGpuKernel() : skin(2.0), cutoff(9.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
  ~NeighborListUpdateNewGpuKernel() override = default;
  bool Init(const CNodePtr &kernel_node) override {
    grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers"));
    atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
    refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
    not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
    nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "nxy"));
    excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));

    cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
    half_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "half_skin_square"));
    cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin"));
    half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin"));
    cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square"));
    forced_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_update"));
    forced_check = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_check"));
    h_bucket.resize(grid_numbers);
    h_gpointer.resize(grid_numbers);
    InitSizeLists();
    return true;
  }

  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
    auto atom_numbers_in_grid_bucket = GetDeviceAddress<int>(inputs, 0);
    auto bucket = GetDeviceAddress<int>(inputs, 1);
    auto crd = GetDeviceAddress<float>(inputs, 2);
    auto box_length = GetDeviceAddress<float>(inputs, 3);
    auto grid_n = GetDeviceAddress<int>(inputs, 4);
    auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
    auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
    auto old_crd = GetDeviceAddress<float>(inputs, 7);
    auto crd_to_uint_crd_cof = GetDeviceAddress<float>(inputs, 8);
    auto uint_crd = GetDeviceAddress<unsigned int>(inputs, 9);
    auto gpointer = GetDeviceAddress<int>(inputs, 10);
    auto nl_atom_numbers = GetDeviceAddress<int>(inputs, 11);
    auto nl_atom_serial = GetDeviceAddress<int>(inputs, 12);
    auto uint_dr_to_dr_cof = GetDeviceAddress<float>(inputs, 13);
    auto excluded_list_start = GetDeviceAddress<int>(inputs, 14);
    auto excluded_list = GetDeviceAddress<int>(inputs, 15);
    auto excluded_numbers = GetDeviceAddress<int>(inputs, 16);
    auto need_refresh_flag = GetDeviceAddress<int>(inputs, 17);
    auto d_refresh_count = GetDeviceAddress<int>(inputs, 18);

    GRID_BUCKET *d_bucket = reinterpret_cast<GRID_BUCKET *>(GetDeviceAddress<int>(workspaces, 0));
    GRID_POINTER *d_gpointer = reinterpret_cast<GRID_POINTER *>(GetDeviceAddress<int>(workspaces, 1));
    NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
    float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);

    for (size_t i = 0; i < h_bucket.size(); i += 1) {
      h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
    }
    for (size_t i = 0; i < h_gpointer.size(); i += 1) {
      h_gpointer[i].grid_serial = gpointer + i * 125;
    }

    cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice,
                    reinterpret_cast<cudaStream_t>(stream_ptr));
    cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
                    reinterpret_cast<cudaStream_t>(stream_ptr));
    ConstructNeighborList(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
                          reinterpret_cast<cudaStream_t>(stream_ptr));

    NeighborListRefresh(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
                        cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
                        grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
                        half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
                        excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, forced_update,
                        forced_check, reinterpret_cast<cudaStream_t>(stream_ptr));
    CopyNeighborList(atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial,
                     reinterpret_cast<cudaStream_t>(stream_ptr));
    return true;
  }

 protected:
  void InitSizeLists() override {
    input_size_list_.push_back(sizeof(int) * grid_numbers);
    input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers);
    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
    input_size_list_.push_back(sizeof(VECTOR));

    input_size_list_.push_back(sizeof(INT_VECTOR));
    input_size_list_.push_back(sizeof(VECTOR));
    input_size_list_.push_back(sizeof(int) * atom_numbers);

    input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
    input_size_list_.push_back(sizeof(VECTOR));
    input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers);

    input_size_list_.push_back(sizeof(int) * grid_numbers * 125);
    input_size_list_.push_back(sizeof(int) * atom_numbers);
    input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers);
    input_size_list_.push_back(sizeof(VECTOR));

    input_size_list_.push_back(sizeof(int) * atom_numbers);
    input_size_list_.push_back(sizeof(int) * excluded_atom_numbers);
    input_size_list_.push_back(sizeof(int) * atom_numbers);

    input_size_list_.push_back(sizeof(int));
    input_size_list_.push_back(sizeof(int));

    workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers);
    workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers);
    workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers);
    workspace_size_list_.push_back(sizeof(float) * 3);

    output_size_list_.push_back(sizeof(float));
  }

 private:
  float skin;
  float cutoff;
  int not_first_time;
  int atom_numbers;
  int grid_numbers;
  int refresh_interval;
  int nxy;
  int max_atom_in_grid_numbers;
  int max_neighbor_numbers;
  int excluded_atom_numbers;
  float half_skin_square;
  float cutoff_square;
  float cutoff_with_skin;
  float half_cutoff_with_skin;
  float cutoff_with_skin_square;
  int forced_update;
  int forced_check;

  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
  std::vector<GRID_BUCKET> h_bucket;
  std::vector<GRID_POINTER> h_gpointer;
 };
 }  // namespace kernel
 }  // namespace mindspore

 #endif
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.cc
@@ -13,12 +13,16 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  RestrainEnergy. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(restrainenergy,
 MS_REG_GPU_KERNEL_TWO(RestrainEnergy,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_impl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_impl_kernel.cc
@@ -13,12 +13,16 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 *Note:
 *  RestrainForceWithAtomEnergyVirial. This is an experimental interface that is subject to change and/or deletion.
 */

 #include "backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_kernel.h"

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_TWO(restrainforcewithatomenergyandvirial,
 MS_REG_GPU_KERNEL_TWO(RestrainForceWithAtomEnergyVirial,
                      KernelAttr()
                        .AddInputAttr(kNumberTypeInt32)
                        .AddInputAttr(kNumberTypeFloat32)