Browse Source

!21132 sponge ops 0730

Merge pull request !21132 from jiahongQian/mater2
tags/v1.4.0
i-robot Gitee 4 years ago
parent
commit
dbe8016596
22 changed files with 1359 additions and 139 deletions
  1. +75
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cu
  2. +27
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh
  3. +44
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cu
  4. +26
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh
  5. +51
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cu
  6. +26
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh
  7. +147
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cu
  8. +29
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh
  9. +139
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cu
  10. +29
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh
  11. +132
    -112
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cu
  12. +13
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh
  13. +35
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.cc
  14. +112
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h
  15. +31
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.cc
  16. +101
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.h
  17. +32
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.cc
  18. +105
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.h
  19. +39
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.cc
  20. +145
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h
  21. +3
    -3
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
  22. +18
    -13
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h

+ 75
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cu View File

@@ -0,0 +1,75 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"

__global__ void BondForceWithAtomEnergyAndVirialKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const VECTOR *scaler, const int *atom_a, const int *atom_b,
const float *bond_k, const float *bond_r0, VECTOR *frc,
float *atom_energy, float *atom_virial) {
int bond_i = blockDim.x * blockIdx.x + threadIdx.x;
if (bond_i < bond_numbers) {
int atom_i = atom_a[bond_i];
int atom_j = atom_b[bond_i];

float k = bond_k[bond_i];
float r0 = bond_r0[bond_i];

VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]);
float abs_r = norm3df(dr.x, dr.y, dr.z);
float r_1 = 1. / abs_r;

float tempf2 = abs_r - r0;
float tempf = 2 * tempf2 * k;
VECTOR f = tempf * r_1 * dr;

atomicAdd(&frc[atom_i].x, -f.x);
atomicAdd(&frc[atom_i].y, -f.y);
atomicAdd(&frc[atom_i].z, -f.z);

atomicAdd(&frc[atom_j].x, f.x);
atomicAdd(&frc[atom_j].y, f.y);
atomicAdd(&frc[atom_j].z, f.z);

atomicAdd(&atom_virial[atom_i], -tempf * abs_r);
atomicAdd(&atom_energy[atom_i], k * tempf2 * tempf2);
}
}

void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.);
Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
BondForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
bond_numbers, uint_crd, scaler, atom_a, atom_b, bond_k, bond_r0, frc, atom_energy, atom_v);
return;
}

void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
cudaStream_t stream);

+ 27
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh View File

@@ -0,0 +1,27 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_

#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
cudaStream_t stream);
#endif

+ 44
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cu View File

@@ -0,0 +1,44 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh"

__global__ void Get_Center_Of_Mass(int residue_numbers, int *start, int *end, VECTOR *crd, float *atom_mass,
float *residue_mass_inverse, VECTOR *center_of_mass) {
for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers;
residue_i += gridDim.x * blockDim.x) {
VECTOR com_lin = {0.0f, 0.0f, 0.0f};
for (int atom_i = start[residue_i]; atom_i < end[residue_i]; atom_i += 1) {
com_lin = com_lin + atom_mass[atom_i] * crd[atom_i];
}
center_of_mass[residue_i] = residue_mass_inverse[residue_i] * com_lin;
}
}

void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * residue_numbers) / 128), 128, 0, stream>>>(3 * residue_numbers,
center_of_mass_f, 0.);
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
Get_Center_Of_Mass<<<20, 32, 0, stream>>>(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse,
center_of_mass);
return;
}

void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);

+ 26
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh View File

@@ -0,0 +1,26 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_

#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);

#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_

+ 51
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cu View File

@@ -0,0 +1,51 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh"

__global__ void Map_Center_Of_Mass(int residue_numbers, int *start, int *end, float scaler, VECTOR *center_of_mass,
VECTOR *box_length, VECTOR *no_wrap_crd, VECTOR *crd) {
VECTOR trans_vec;
VECTOR com;
for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers;
residue_i += gridDim.x * blockDim.x) {
com = center_of_mass[residue_i];

trans_vec.x = com.x - floorf(com.x / box_length[0].x) * box_length[0].x;
trans_vec.y = com.y - floorf(com.y / box_length[0].y) * box_length[0].y;
trans_vec.z = com.z - floorf(com.z / box_length[0].z) * box_length[0].z;
trans_vec = scaler * trans_vec - com;

for (int atom_i = start[residue_i] + threadIdx.y; atom_i < end[residue_i]; atom_i += blockDim.y) {
crd[atom_i] = no_wrap_crd[atom_i] + trans_vec;
}
}
}

void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream) {
VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f);
VECTOR *no_wrap_crd = reinterpret_cast<VECTOR *>(no_wrap_crd_f);
VECTOR *box_length = reinterpret_cast<VECTOR *>(box_length_f);
VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f);
Map_Center_Of_Mass<<<20, {32, 4}, 0, stream>>>(residue_numbers, start, end, scaler, center_of_mass, box_length,
no_wrap_crd, crd);
return;
}

void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream);

+ 26
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh View File

@@ -0,0 +1,26 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_

#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f,
float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream);

#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_

+ 147
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cu View File

@@ -0,0 +1,147 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"

__global__ void LJ_Direct_CF_Force_With_Atom_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl,
const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength,
const float *LJ_type_A, const float *LJ_type_B,
const float cutoff, VECTOR *frc, const float pme_beta,
const float sqrt_pi, float *atom_energy) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
NEIGHBOR_LIST nl_i = nl[atom_i];
int N = nl_i.atom_numbers;
int atom_j;
int int_x;
int int_y;
int int_z;
UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2;
VECTOR dr;
float dr_2;
float dr_4;
float dr_8;
float dr_6;
float frc_abs = 0.;
VECTOR frc_lin;
VECTOR frc_record = {0., 0., 0.};

float charge_i = r1.charge;
float charge_j;
float dr_abs;
float dr_1;
float beta_dr;
float frc_cf_abs;

float ene_lin = 0.;
float ene_lin2 = 0.;

int x, y;
int atom_pair_LJ_type;
for (int j = threadIdx.y; j < N; j = j + blockDim.y) {
atom_j = nl_i.atom_serial[j];
r2 = uint_crd[atom_j];
charge_j = r2.charge;

int_x = r2.uint_x - r1.uint_x;
int_y = r2.uint_y - r1.uint_y;
int_z = r2.uint_z - r1.uint_z;
dr.x = boxlength[0].x * int_x;
dr.y = boxlength[0].y * int_y;
dr.z = boxlength[0].z * int_z;
dr_abs = norm3df(dr.x, dr.y, dr.z);
if (dr_abs < cutoff) {
dr_1 = 1. / dr_abs;
dr_2 = dr_1 * dr_1;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_6 = dr_4 * dr_2;

y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;

frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8;
beta_dr = pme_beta * dr_abs;
frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr);
frc_cf_abs = frc_cf_abs * dr_2 * dr_1;
frc_cf_abs = charge_i * charge_j * frc_cf_abs;

ene_lin2 = ene_lin2 + charge_i * charge_j * erfcf(beta_dr) * dr_1;
ene_lin =
ene_lin +
(0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6;

frc_abs = frc_abs - frc_cf_abs;

frc_lin.x = frc_abs * dr.x;
frc_lin.y = frc_abs * dr.y;
frc_lin.z = frc_abs * dr.z;

frc_record.x = frc_record.x + frc_lin.x;
frc_record.y = frc_record.y + frc_lin.y;
frc_record.z = frc_record.z + frc_lin.z;

atomicAdd(&frc[atom_j].x, -frc_lin.x);
atomicAdd(&frc[atom_j].y, -frc_lin.y);
atomicAdd(&frc[atom_j].z, -frc_lin.z);
}
}
atomicAdd(&frc[atom_i].x, frc_record.x);
atomicAdd(&frc[atom_i].y, frc_record.y);
atomicAdd(&frc[atom_i].z, frc_record.z);

atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);
}
}

void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream) {
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f);
VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f));
int max_neighbor_numbers = 800;
NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl);
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a);

UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ);

UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));

Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge);

LJ_Direct_CF_Force_With_Atom_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>(
atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI,
atom_energy);
return;
}

void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream);

+ 29
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh View File

@@ -0,0 +1,29 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_

#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta,
const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f,
float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl,
const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy,
cudaStream_t stream);

#endif

+ 139
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cu View File

@@ -0,0 +1,139 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh"

__global__ void Dihedral14LJCFForceWithAtomEnergyAndVirialKernel(
const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *scaler, const int *a_14,
const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, VECTOR *frc, float *atom_energy, float *atom_virial) {
int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x;
if (dihedral_14_i < dihedral_14_numbers) {
UINT_VECTOR_LJ_TYPE r1, r2;
VECTOR dr;
float dr_abs;
float dr2;
float dr_1;
float dr_2;
float dr_4;
float dr_8;
float dr_14;
float frc_abs = 0.;
VECTOR temp_frc;

float ene_lin;
float ene_lin2;

int x, y;
int atom_pair_LJ_type;

int atom_i = a_14[dihedral_14_i];
int atom_j = b_14[dihedral_14_i];

r1 = uint_crd[atom_i];
r2 = uint_crd[atom_j];

dr = Get_Periodic_Displacement(r2, r1, scaler[0]);

dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z;

dr_2 = 1.0 / dr2;
dr_4 = dr_2 * dr_2;
dr_8 = dr_4 * dr_4;
dr_14 = dr_8 * dr_4 * dr_2;
dr_abs = norm3df(dr.x, dr.y, dr.z);
dr_1 = 1. / dr_abs;

// CF
float charge_i = r1.charge;
float charge_j = r2.charge;
float frc_cf_abs;
frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1;
frc_cf_abs = -charge_i * charge_j * frc_cf_abs;
// LJ
y = (r2.LJ_type - r1.LJ_type);
x = y >> 31;
y = (y ^ x) - x;
x = r2.LJ_type + r1.LJ_type;
r2.LJ_type = (x + y) >> 1;
x = (x - y) >> 1;
atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x;

frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8;
frc_abs *= lj_scale_factor[dihedral_14_i];

frc_abs += frc_cf_abs;
temp_frc.x = frc_abs * dr.x;
temp_frc.y = frc_abs * dr.y;
temp_frc.z = frc_abs * dr.z;

atomicAdd(&frc[atom_j].x, -temp_frc.x);
atomicAdd(&frc[atom_j].y, -temp_frc.y);
atomicAdd(&frc[atom_j].z, -temp_frc.z);
atomicAdd(&frc[atom_i].x, temp_frc.x);
atomicAdd(&frc[atom_i].y, temp_frc.y);
atomicAdd(&frc[atom_i].z, temp_frc.z);

ene_lin = r1.charge * r2.charge * dr_1;
ene_lin *= cf_scale_factor[dihedral_14_i];
ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 -
0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘
ene_lin2 *= lj_scale_factor[dihedral_14_i];

atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2);

atomicAdd(&atom_virial[atom_i], -temp_frc * dr);
}
}

void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream) {
size_t thread_per_block = 128;
size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128);
UNSIGNED_INT_VECTOR *uint_crd =
const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f));

UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f);

Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge);

Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.);
VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f));
VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f));

Dihedral14LJCFForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>(
dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A,
LJ_type_B, frc, atom_energy, atom_virial);

return;
}

void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream);

+ 29
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh View File

@@ -0,0 +1,29 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H

#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers,
const int *uint_crd_f, const int *LJtype, const float *charge,
float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14,
const int *b_14, const float *lj_scale_factor,
const float *cf_scale_factor, const float *LJ_type_A,
const float *LJ_type_B, float *frc_f, float *atom_energy,
float *atom_virial, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H

mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu → mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cu View File

@@ -13,8 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh"
#include <stdio.h>
#include <vector>

__device__ __host__ VECTOR operator-(const VECTOR &vecb) {
VECTOR vec;
vec.x = -vecb.x;
vec.y = -vecb.y;
vec.z = -vecb.z;
return vec;
}

__device__ __host__ VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b, const VECTOR box_length) {
VECTOR dr;
// dr = vec_a - vec_b;
dr.x = vec_a.x - vec_b.x;
dr.y = vec_a.y - vec_b.y;
dr.x = vec_a.z - vec_b.z;

dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
return dr;
}

__global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
@@ -32,15 +55,20 @@ __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, con
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
/*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x;
uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y;
uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
INT_VECTOR tempi;
VECTOR temp = crd[atom_i];

temp.x *= scale_factor[0];
temp.y *= scale_factor[1];
temp.z *= scale_factor[2];

tempi.int_x = temp.x;
tempi.int_y = temp.y;
tempi.int_z = temp.z;

uint_crd[atom_i].uint_x = (tempi.int_x << 2);
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
}
}

@@ -80,7 +108,6 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa
} else {
crd[atom_i].y = crd[atom_i].y + box_length[1];
}

if (crd[atom_i].z >= 0) {
if (crd[atom_i].z < box_length[2]) {
} else {
@@ -198,6 +225,21 @@ __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const
}
}

__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const VECTOR *box_length, const float half_skin_square,
int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1 = Get_Periodic_Displacement(r1, r2, box_length[0]);
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
}
}

__global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers) {
@@ -245,27 +287,18 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
cudaStream_t stream) {
if (refresh_sign[0] == 1) {
VECTOR trans_vec = {-skin, -skin, -skin};
std::vector<int> h_refresh_sign(1);
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
if (h_refresh_sign[0] == 1) {
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
trans_vec);

Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);

trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
trans_vec.z = -trans_vec.z;

Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
trans_vec);

Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));

@@ -282,40 +315,10 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
refresh_sign[0] = 0;
h_refresh_sign[0] = 0;
}
}

void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin,
float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
cudaStream_t stream) {
VECTOR trans_vec = {skin, skin, skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
cutoff_skin_square);
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
}

__global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
@@ -330,15 +333,39 @@ void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
}

__global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
__global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
int *nl_atom_numbers, int *nl_atom_serial) {
int i, j;
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl_atom_numbers[i] = nl[i].atom_numbers;
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
if (j < nl_atom_numbers[i]) {
nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
} else {
nl_atom_serial[i * max_neighbor_numbers + j] = 0;
}
}
}
}

void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl,
nl_atom_numbers);
__global__ void Reset_List(const int element_numbers, int *list, const int replace_element) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = replace_element;
}
}

__global__ void Reset_List(const int element_numbers, float *list, const float replace_element) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = replace_element;
}
}

void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
int *nl_atom_serial, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
}

void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square,
@@ -348,22 +375,13 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk
UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, cudaStream_t stream) {
VECTOR trans_vec = {-skin, -skin, -skin};

Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);

Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
trans_vec.z = -trans_vec.z;
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);

cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);

Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
@@ -387,51 +405,53 @@ __global__ void Mul_half(float *src, float *dst) {
}
}

void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, cudaStream_t stream) {
if (not_first_time) {
if (refresh_interval > 0) {
std::vector<int> refresh_count_list(1);
cudaMemcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
int refresh_count = refresh_count_list[0];

if (refresh_count % refresh_interval == 0) {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length,
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
excluded_numbers, stream);
}
refresh_count += 1;
cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream);
} else {
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
is_need_refresh_neighbor_list);
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
stream);
__global__ void Mul_quarter(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.25;
}
}

int refresh_count = 0;

void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square,
float cutoff_with_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial,
GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof,
float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof,
GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list,
int forced_update, int forced_check, cudaStream_t stream) {
if (forced_update) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(
grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);

} else if (refresh_interval > 0 && !forced_check) {
if (refresh_count % refresh_interval == 0) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length,
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
excluded_numbers, stream);
}
refresh_count += 1;
} else {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_First_Time(
is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
grid_N, Nxy, stream);
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
stream);
}
}

mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh → mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_

struct VECTOR {
float x;
@@ -46,15 +46,17 @@ struct GRID_POINTER {
void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream);

void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream);
void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
int *nl_atom_serial, cudaStream_t stream);

void Neighbor_List_Update(int grid_numbers, int atom_numbers, int* d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, cudaStream_t stream);
void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square,
float cutoff_with_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial,
GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof,
float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof,
GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list,
int forced_update, int forced_check, cudaStream_t stream);

#endif

+ 35
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.cc View File

@@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_THREE(BondForceWithAtomEnergyAndVirial,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
BondForceWithAtomEnergyAndVirialGpuKernel, float, int, unsigned int)

} // namespace kernel
} // namespace mindspore

+ 112
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h View File

@@ -0,0 +1,112 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_BOND_BOND_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_BOND_BOND_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh"

#include <cuda_runtime_api.h>
#include <map>
#include <string>
#include <vector>

#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"

namespace mindspore {
namespace kernel {
template <typename T, typename T1, typename T2>
class BondForceWithAtomEnergyAndVirialGpuKernel : public GpuKernel {
public:
BondForceWithAtomEnergyAndVirialGpuKernel() : ele_uint_crd(1) {}
~BondForceWithAtomEnergyAndVirialGpuKernel() override = default;

bool Init(const CNodePtr &kernel_node) override {
// get bond_numbers
kernel_node_ = kernel_node;
bond_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "bond_numbers"));
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_atom_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_atom_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_bond_k = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_bond_r0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);

for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i];
for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i];
for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i];
for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i];
for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i];
InitSizeLists();
return true;
}

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd_f = GetDeviceAddress<const T2>(inputs, 0);
auto scaler_f = GetDeviceAddress<T>(inputs, 1);
auto atom_a = GetDeviceAddress<const T1>(inputs, 2);
auto atom_b = GetDeviceAddress<const T1>(inputs, 3);
auto bond_k = GetDeviceAddress<T>(inputs, 4);
auto bond_r0 = GetDeviceAddress<T>(inputs, 5);

auto frc_f = GetDeviceAddress<T>(outputs, 0);
auto atom_energy = GetDeviceAddress<T>(outputs, 1);
auto atom_v = GetDeviceAddress<T>(outputs, 2);
BondForceWithAtomEnergyAndVirial(bond_numbers, atom_numbers, uint_crd_f, scaler_f, atom_a, atom_b, bond_k, bond_r0,
frc_f, atom_energy, atom_v, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_uint_crd * sizeof(T2));
input_size_list_.push_back(ele_scaler * sizeof(T));
input_size_list_.push_back(ele_atom_a * sizeof(T1));
input_size_list_.push_back(ele_atom_b * sizeof(T1));
input_size_list_.push_back(ele_bond_k * sizeof(T));
input_size_list_.push_back(ele_bond_r0 * sizeof(T));

output_size_list_.push_back(atom_numbers * 3 * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
}

private:
size_t ele_uint_crd = 1;
size_t ele_scaler = 1;
size_t ele_atom_a = 1;
size_t ele_atom_b = 1;
size_t ele_bond_k = 1;
size_t ele_bond_r0 = 1;

std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int bond_numbers;
int atom_numbers;
};
} // namespace kernel
} // namespace mindspore
#endif

+ 31
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.cc View File

@@ -0,0 +1,31 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(GetCenterOfMass,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
GetCenterOfMassGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

+ 101
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_

#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh"

namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class GetCenterOfMassGpuKernel : public GpuKernel {
public:
GetCenterOfMassGpuKernel() : ele_start(1) {}
~GetCenterOfMassGpuKernel() override = default;

bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers"));

auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_atom_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_residue_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);

for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i];
for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i];
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];
for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i];
for (size_t i = 0; i < shape_residue_mass_inverse.size(); i++)
ele_residue_mass_inverse *= shape_residue_mass_inverse[i];

InitSizeLists();
return true;
}

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto start = GetDeviceAddress<T1>(inputs, 0);
auto end = GetDeviceAddress<T1>(inputs, 1);
auto crd = GetDeviceAddress<T>(inputs, 2);
auto atom_mass = GetDeviceAddress<T>(inputs, 3);
auto residue_mass_inverse = GetDeviceAddress<T>(inputs, 4);

auto center_of_mass = GetDeviceAddress<T>(outputs, 0);

GetCenterOfMass(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse, center_of_mass,
reinterpret_cast<cudaStream_t>(stream_ptr));

return true;
}

protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_start * sizeof(T1));
input_size_list_.push_back(ele_end * sizeof(T1));
input_size_list_.push_back(ele_crd * sizeof(T));
input_size_list_.push_back(ele_atom_mass * sizeof(T));
input_size_list_.push_back(ele_residue_mass_inverse * sizeof(T));
output_size_list_.push_back(3 * sizeof(T) * residue_numbers);
}

private:
size_t ele_start = 1;
size_t ele_end = 1;
size_t ele_crd = 1;
size_t ele_atom_mass = 1;
size_t ele_residue_mass_inverse = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int residue_numbers;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_

+ 32
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.cc View File

@@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(MapCenterOfMass,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
MapCenterOfMassGpuKernel, float, int)
} // namespace kernel
} // namespace mindspore

+ 105
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.h View File

@@ -0,0 +1,105 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_

#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh"

namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class MapCenterOfMassGpuKernel : public GpuKernel {
public:
MapCenterOfMassGpuKernel() : ele_start(1) {}
~MapCenterOfMassGpuKernel() override = default;

bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers"));
scaler = static_cast<float>(GetAttr<float>(kernel_node, "scaler"));

auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_center_of_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_box_length = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_no_wrap_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);

for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i];
for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i];
for (size_t i = 0; i < shape_center_of_mass.size(); i++) ele_center_of_mass *= shape_center_of_mass[i];
for (size_t i = 0; i < shape_box_length.size(); i++) ele_box_length *= shape_box_length[i];
for (size_t i = 0; i < shape_no_wrap_crd.size(); i++) ele_no_wrap_crd *= shape_no_wrap_crd[i];
for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i];

InitSizeLists();
return true;
}

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto start = GetDeviceAddress<T1>(inputs, 0);
auto end = GetDeviceAddress<T1>(inputs, 1);
auto center_of_mass = GetDeviceAddress<T>(inputs, 2);
auto box_length = GetDeviceAddress<T>(inputs, 3);
auto no_wrap_crd = GetDeviceAddress<T>(inputs, 4);
auto crd = GetDeviceAddress<T>(inputs, 5);

MapCenterOfMass(residue_numbers, start, end, scaler, center_of_mass, box_length, no_wrap_crd, crd,
reinterpret_cast<cudaStream_t>(stream_ptr));

return true;
}

protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_start * sizeof(T1));
input_size_list_.push_back(ele_end * sizeof(T1));
input_size_list_.push_back(ele_center_of_mass * sizeof(T));
input_size_list_.push_back(ele_box_length * sizeof(T));
input_size_list_.push_back(ele_no_wrap_crd * sizeof(T));
input_size_list_.push_back(ele_crd * sizeof(T));
output_size_list_.push_back(sizeof(T));
}

private:
size_t ele_start = 1;
size_t ele_end = 1;
size_t ele_center_of_mass = 1;
size_t ele_box_length = 1;
size_t ele_no_wrap_crd = 1;
size_t ele_crd = 1;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int residue_numbers;
float scaler;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_

+ 39
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.cc View File

@@ -0,0 +1,39 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(Dihedral14LJCFForceWithAtomEnergyAndVirial,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel, float, int)

} // namespace kernel
} // namespace mindspore

+ 145
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h View File

@@ -0,0 +1,145 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_

#include <cuda_runtime_api.h>
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh"

namespace mindspore {
namespace kernel {

template <typename T, typename T1>
class Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel : public GpuKernel {
public:
Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel() : ele_uint_crd(1) {}
~Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel() override = default;

bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers"));
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));

auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7);
auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8);
auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);

for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i];
for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i];
for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i];
for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i];
for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i];
for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i];
for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i];
for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i];
for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i];
for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i];

InitSizeLists();
return true;
}

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
auto LJtype = GetDeviceAddress<const T1>(inputs, 1);
auto charge = GetDeviceAddress<const T>(inputs, 2);
auto boxlength_f = GetDeviceAddress<T>(inputs, 3);
auto a_14 = GetDeviceAddress<const T1>(inputs, 4);
auto b_14 = GetDeviceAddress<const T1>(inputs, 5);
auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6);
auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7);
auto LJ_type_A = GetDeviceAddress<T>(inputs, 8);
auto LJ_type_B = GetDeviceAddress<T>(inputs, 9);
auto frc_f = GetDeviceAddress<T>(outputs, 0);
auto atom_energy = GetDeviceAddress<T>(outputs, 1);
auto atom_virial = GetDeviceAddress<T>(outputs, 2);

auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0);

Dihedral14LJCFForceWithAtomEnergyAndVirial(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge,
uint_crd_with_LJ, boxlength_f, a_14, b_14, lj_scale_factor,
cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, atom_energy, atom_virial,
reinterpret_cast<cudaStream_t>(stream_ptr));

return true;
}

protected:
void InitSizeLists() override {
input_size_list_.push_back(ele_uint_crd * sizeof(T1));
input_size_list_.push_back(ele_LJtype * sizeof(T1));
input_size_list_.push_back(ele_charge * sizeof(T));
input_size_list_.push_back(ele_boxlength_f * sizeof(T));
input_size_list_.push_back(ele_a_14 * sizeof(T1));
input_size_list_.push_back(ele_b_14 * sizeof(T1));
input_size_list_.push_back(ele_lj_scale_factor * sizeof(T));
input_size_list_.push_back(ele_cf_scale_factor * sizeof(T));
input_size_list_.push_back(ele_LJ_type_A * sizeof(T));
input_size_list_.push_back(ele_LJ_type_B * sizeof(T));
workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE));

output_size_list_.push_back(3 * atom_numbers * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
output_size_list_.push_back(atom_numbers * sizeof(T));
}

private:
size_t ele_uint_crd = 1;
size_t ele_LJtype = 1;
size_t ele_charge = 1;
size_t ele_boxlength_f = 1;
size_t ele_a_14 = 1;
size_t ele_b_14 = 1;
size_t ele_lj_scale_factor = 1;
size_t ele_cf_scale_factor = 1;
size_t ele_LJ_type_A = 1;
size_t ele_LJ_type_B = 1;

std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int dihedral_14_numbers;
int atom_numbers;
struct UINT_VECTOR_LJ_TYPE {
unsigned int uint_x;
unsigned int uint_y;
unsigned int uint_z;
int LJ_type;
float charge;
};
};
} // namespace kernel
} // namespace mindspore
#endif

mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc → mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc View File

@@ -14,11 +14,11 @@
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h"
#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(NeighborListUpdate,
MS_REG_GPU_KERNEL_TWO(NeighborListUpdateNew,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
@@ -40,6 +40,6 @@ MS_REG_GPU_KERNEL_TWO(NeighborListUpdate,
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
NeighborListUpdateGpuKernel, int, float)
NeighborListUpdateNewGpuKernel, int, float)
} // namespace kernel
} // namespace mindspore

mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h → mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_

#include <cuda_runtime_api.h>
#include <vector>
@@ -24,15 +24,15 @@
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh"

namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class NeighborListUpdateGpuKernel : public GpuKernel {
class NeighborListUpdateNewGpuKernel : public GpuKernel {
public:
NeighborListUpdateGpuKernel() : skin(2.0), cutoff(10.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
~NeighborListUpdateGpuKernel() override = default;
NeighborListUpdateNewGpuKernel() : skin(2.0), cutoff(9.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
~NeighborListUpdateNewGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers"));
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
@@ -46,6 +46,8 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin"));
half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin"));
cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square"));
forced_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_update"));
forced_check = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_check"));
h_bucket.resize(grid_numbers);
h_gpointer.resize(grid_numbers);
InitSizeLists();
@@ -99,13 +101,14 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
reinterpret_cast<cudaStream_t>(stream_ptr));

Neighbor_List_Update(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
excluded_list, excluded_numbers, half_skin_square, need_refresh_flag,
reinterpret_cast<cudaStream_t>(stream_ptr));
CopyNeighborListAtomNumber(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
Neighbor_List_Update_New(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, forced_update,
forced_check, reinterpret_cast<cudaStream_t>(stream_ptr));
CopyNeighborListAtomNumber(atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

@@ -160,6 +163,8 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
float cutoff_with_skin;
float half_cutoff_with_skin;
float cutoff_with_skin_square;
int forced_update;
int forced_check;

std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

Loading…
Cancel
Save