Browse Source

sponge ops nb update others

tags/v1.5.0-rc1
q00596439 4 years ago
parent
commit
44189d25bf
11 changed files with 650 additions and 183 deletions
  1. +345
    -152
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu
  2. +32
    -11
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh
  3. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.cc
  4. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.cc
  5. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.cc
  6. +4
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc
  7. +16
    -15
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h
  8. +49
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
  9. +179
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h
  10. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.cc
  11. +5
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_impl_kernel.cc

+ 345
- 152
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cu View File

@@ -13,51 +13,46 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
*Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"
#include <vector>
__global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = origin_list[i];
}
}
__global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {

// common functions

static __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < element_numbers) {
list[i] = origin_list[i];
}
}

__global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
static __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
}
}
INT_VECTOR tempi;
VECTOR temp = crd[atom_i];

__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec.x;
vec_list[i].y = vec_list[i].y + translation_vec.y;
vec_list[i].z = vec_list[i].z + translation_vec.z;
}
}
__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec[0].x;
vec_list[i].y = vec_list[i].y + translation_vec[0].y;
vec_list[i].z = vec_list[i].z + translation_vec[0].z;
temp.x *= scale_factor[0];
temp.y *= scale_factor[1];
temp.z *= scale_factor[2];

tempi.int_x = temp.x;
tempi.int_y = temp.y;
tempi.int_z = temp.z;

uint_crd[atom_i].uint_x = (tempi.int_x << 2);
uint_crd[atom_i].uint_y = (tempi.int_y << 2);
uint_crd[atom_i].uint_z = (tempi.int_z << 2);
}
}
__global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {

static __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
if (crd[atom_i].x >= 0) {
@@ -77,7 +72,6 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa
} else {
crd[atom_i].y = crd[atom_i].y + box_length[1];
}

if (crd[atom_i].z >= 0) {
if (crd[atom_i].z < box_length[2]) {
} else {
@@ -89,7 +83,8 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa
}
}

__global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) {
static __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket,
GRID_BUCKET *bucket) {
int grid_serial = blockDim.x * blockIdx.x + threadIdx.x;
if (grid_serial < grid_numbers) {
GRID_BUCKET bucket_i = bucket[grid_serial];
@@ -100,20 +95,12 @@ __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_g
}
}

__global__ void Clear_Neighbor_List_Serial(const int atom_numbers, int max_neighbor_number, NEIGHBOR_LIST *nl) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
for (int i = 0; i < max_neighbor_number; i ++) {
nl[atom_i].atom_serial[i] = atom_numbers;
}
}
}

__global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd,
const int *grid_N, const int gridxy, int *atom_in_grid_serial) {
static __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse,
const VECTOR *crd, const int *grid_N, const int gridxy,
int *atom_in_grid_serial) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0]; // crd.x must < boxlength.x
int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0];
int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1];
int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2];
Nx = Nx & ((Nx - grid_N[0]) >> 31);
@@ -123,8 +110,8 @@ __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *gr
}
}

__global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket,
int *atom_numbers_in_grid_bucket) {
static __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial,
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int grid_serial = atom_in_grid_serial[atom_i];
@@ -145,11 +132,12 @@ __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_
}
}
}
__global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
const float cutoff_skin_square) {

static __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd,
const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial,
const GRID_POINTER *gpointer, const GRID_BUCKET *bucket,
const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl,
const float cutoff_skin_square) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int grid_serial = atom_in_grid_serial[atom_i];
@@ -183,30 +171,15 @@ __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_V
}
}
}
} // 124 grid cycle
nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
}
}

__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const float half_skin_square, int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1.x = r1.x - r2.x;
r1.y = r1.y - r2.y;
r1.z = r1.z - r2.z;
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
nl[atom_i].atom_numbers = atom_numbers_in_nl_lin;
}
}

__global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
const int *excluded_list_start, const int *excluded_list,
const int *excluded_atom_numbers) {
static __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl,
const int *excluded_list_start,
const int *excluded_list,
const int *excluded_atom_numbers) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
int excluded_number = excluded_atom_numbers[atom_i];
@@ -236,29 +209,113 @@ __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_num
if (excluded_atom_numbers_count < excluded_atom_numbers_lin) {
} else {
break;
} // break
} // in the range of excluded min to max
} // cycle for neighbors
}
}
}
nl[atom_i].atom_numbers = atomnumbers_in_nl_lin;
} // if need excluded
}
}
}

void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
int max_neighbor_number, cudaStream_t stream) {
if (refresh_sign[0] == 1) {
static __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl[i].atom_numbers = nl_atom_numbers[i];
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
}
}

static __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl,
int *nl_atom_numbers, int *nl_atom_serial) {
int i, j;
for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl_atom_numbers[i] = nl[i].atom_numbers;
for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) {
if (j < nl_atom_numbers[i]) {
nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j];
} else {
nl_atom_serial[i * max_neighbor_numbers + j] = 0;
}
}
}
}

static __global__ void Mul_half(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.5;
}
}

static __global__ void Mul_quarter(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.25;
}
}

// old neighbor list update functions
__global__ void Crd_To_Uint_Crd_Half(const int atom_numbers, float *scale_factor, const VECTOR *crd,
UNSIGNED_INT_VECTOR *uint_crd) {
int atom_i = blockDim.x * blockIdx.x + threadIdx.x;
if (atom_i < atom_numbers) {
uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0];
uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1];
uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2];
uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1;
uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1;
uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1;
}
}

__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec.x;
vec_list[i].y = vec_list[i].y + translation_vec.y;
vec_list[i].z = vec_list[i].z + translation_vec.z;
}
}

__global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < vector_numbers) {
vec_list[i].x = vec_list[i].x + translation_vec[0].x;
vec_list[i].y = vec_list[i].y + translation_vec[0].y;
vec_list[i].z = vec_list[i].z + translation_vec[0].z;
}
}

__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const float half_skin_square, int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1.x = r1.x - r2.x;
r1.y = r1.y - r2.y;
r1.z = r1.z - r2.z;
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
}
}

void Refresh_Neighbor_List_Half(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd,
VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof,
float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, float *box_length,
const GRID_POINTER *gpointer, GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, float cutoff_skin_square, int grid_numbers,
float *grid_length_inverse, int *grid_N, int nxy, cudaStream_t stream) {
std::vector<int> h_refresh_sign(1);
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
if (h_refresh_sign[0] == 1) {
VECTOR trans_vec = {-skin, -skin, -skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, max_neighbor_number, d_nl);

Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
trans_vec);

@@ -266,7 +323,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
@@ -281,7 +338,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
@@ -291,7 +348,7 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n
Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
refresh_sign[0] = 0;
h_refresh_sign[0] = 0;
}
}

@@ -301,23 +358,21 @@ void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const
float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket,
int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float cutoff_skin_square,
int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy,
int max_neighbor_number, cudaStream_t stream) {
int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
cudaStream_t stream) {
VECTOR trans_vec = {skin, skin, skin};
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);
Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, max_neighbor_number, d_nl);
Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);
Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);
Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);
Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
crd, uint_crd);
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
@@ -327,52 +382,42 @@ void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const
excluded_numbers);
}

__global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers,
int *nl_atom_serial, NEIGHBOR_LIST *nl) {
__global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl[i].atom_numbers = nl_atom_numbers[i];
nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers;
nl_atom_numbers[i] = nl[i].atom_numbers;
}
}

void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream) {
void ConstructNeighborListHalf(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream) {
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
}

__global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) {
nl_atom_numbers[i] = nl[i].atom_numbers;
}
}

void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl,
nl_atom_numbers);
}

void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, int max_neighbor_number, cudaStream_t stream) {
void Refresh_Neighbor_List_No_Check_Half(int grid_numbers, int atom_numbers, float skin, int nxy,
float cutoff_skin_square, int *grid_N, float *box_length,
int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, VECTOR *crd, VECTOR *old_crd,
float *crd_to_uint_crd_cof, UNSIGNED_INT_VECTOR *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
cudaStream_t stream) {
VECTOR trans_vec = {-skin, -skin, -skin};

Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Clear_Neighbor_List_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, max_neighbor_number, d_nl);

Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec);

Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial);
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);
trans_vec.x = -trans_vec.x;
trans_vec.y = -trans_vec.y;
trans_vec.z = -trans_vec.z;
@@ -383,8 +428,8 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk
Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
crd, uint_crd);
Crd_To_Uint_Crd_Half<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
@@ -394,21 +439,14 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
}

__global__ void Mul_half(float *src, float *dst) {
int index = threadIdx.x;
if (index < 3) {
dst[index] = src[index] * 0.5;
}
}

void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, int max_neighbor_number, cudaStream_t stream) {
void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, cudaStream_t stream) {
if (not_first_time) {
if (refresh_interval > 0) {
std::vector<int> refresh_count_list(1);
@@ -418,12 +456,12 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou

if (refresh_count % refresh_interval == 0) {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length,
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
excluded_numbers, max_neighbor_number, stream);
Refresh_Neighbor_List_No_Check_Half(
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof,
reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start,
excluded_list, excluded_numbers, stream);
}
refresh_count += 1;
cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream);
@@ -432,12 +470,12 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square,
is_need_refresh_neighbor_list);
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy,
max_neighbor_number, stream);
Refresh_Neighbor_List_Half(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length,
gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start,
excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers,
grid_length_inverse, grid_N, nxy, stream);
}
} else {
Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
@@ -446,6 +484,161 @@ void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_cou
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof,
uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse,
grid_N, Nxy, max_neighbor_number, stream);
grid_N, nxy, stream);
}
}

// new neighbor list update functions

__device__ __host__ VECTOR Get_Periodic_Displacement_Update(const VECTOR vec_a, const VECTOR vec_b,
const VECTOR box_length) {
VECTOR dr;
dr.x = vec_a.x - vec_b.x;
dr.y = vec_a.y - vec_b.y;
dr.x = vec_a.z - vec_b.z;

dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x;
dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y;
dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z;
return dr;
}

__global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd,
const VECTOR *box_length, const float half_skin_square,
int *need_refresh_flag) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < atom_numbers) {
VECTOR r1 = crd[i];
VECTOR r2 = old_crd[i];
r1 = Get_Periodic_Displacement_Update(r1, r2, box_length[0]);
float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z;
if (r1_2 > half_skin_square) {
atomicExch(&need_refresh_flag[0], 1);
}
}
}

void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd,
UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof,
int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer,
GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers,
float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int nxy,
cudaStream_t stream) {
std::vector<int> h_refresh_sign(1);
cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream);
if (h_refresh_sign[0] == 1) {
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd,
box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>(
3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd));

Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, crd_to_uint_crd_cof, crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket,
d_nl, cutoff_skin_square);

Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0,
stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list,
excluded_numbers);
h_refresh_sign[0] = 0;
}
}

void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int nxy, float cutoff_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket,
float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket,
VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof,
UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer,
NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list,
int *excluded_numbers, cudaStream_t stream) {
Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>(
grid_numbers, atom_numbers_in_grid_bucket, bucket);

Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length);

Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, grid_length_inverse, crd, grid_N, nxy, atom_in_grid_serial);

cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream);

Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket);

Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof,
crd, uint_crd);

Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl,
cutoff_skin_square);

Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(
atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers);
}

void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
int *nl_atom_serial, cudaStream_t stream) {
copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial);
}

void ConstructNeighborList(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream) {
construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl);
}

int refresh_count = 0;

void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream) {
if (forced_update) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(
grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream);

} else if (refresh_interval > 0 && !forced_check) {
if (refresh_count % refresh_interval == 0) {
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, nxy, cutoff_square, grid_N, box_length,
atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket,
reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list,
excluded_numbers, stream);
}
refresh_count += 1;
} else {
Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(
atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd),
reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list);
Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof);
Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd),
reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd),
half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer,
bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list,
excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, nxy,
stream);
}
}

+ 32
- 11
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh View File

@@ -13,9 +13,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_
#include <curand_kernel.h>
#include "runtime/device/gpu/cuda_common.h"

struct VECTOR {
float x;
@@ -43,18 +49,33 @@ struct GRID_POINTER {
int *grid_serial;
};

void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream);
void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream);

void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers,
int *nl_atom_serial, cudaStream_t stream);

void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream);

void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial,
NEIGHBOR_LIST *nl, cudaStream_t stream);

void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream);
void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream);

void Neighbor_List_Update(int grid_numbers, int atom_numbers, int* d_refresh_count, int refresh_interval,
int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl,
int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, int max_neighbor_number, cudaStream_t stream);
void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval,
int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square,
int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse,
int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd,
float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd,
float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start,
int *excluded_list, int *excluded_numbers, float half_skin_square,
int *is_need_refresh_neighbor_list, cudaStream_t stream);

#endif

+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.cc View File

@@ -13,13 +13,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* Totalc6get. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/common/total_c6_get_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(
totalc6get,
Totalc6get,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
TotalC6GetGpuKernel, float, int)
} // namespace kernel


+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.cc View File

@@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* CalculateNowrapCrd. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/crdmcmap/cal_no_wrap_crd_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(calculatenowrapcrd,
MS_REG_GPU_KERNEL_TWO(CalculateNowrapCrd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)


+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.cc View File

@@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* RefreshBoxmapTimes. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/crdmcmap/refresh_boxmaptimes_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(refreshboxmaptimes,
MS_REG_GPU_KERNEL_TWO(RefreshBoxmapTimes,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)


+ 4
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.cc View File

@@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h"



+ 16
- 15
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h View File

@@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* NeighborListUpdate. This is an experimental interface that is subject to change and/or deletion.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_
@@ -38,7 +42,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
Nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "Nxy"));
nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "nxy"));
excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));

cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
@@ -62,7 +66,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
auto bucket = GetDeviceAddress<int>(inputs, 1);
auto crd = GetDeviceAddress<float>(inputs, 2);
auto box_length = GetDeviceAddress<float>(inputs, 3);
auto grid_N = GetDeviceAddress<int>(inputs, 4);
auto grid_n = GetDeviceAddress<int>(inputs, 4);
auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
auto old_crd = GetDeviceAddress<float>(inputs, 7);
@@ -83,11 +87,9 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);

// std::vector<GRID_BUCKET> h_bucket(grid_numbers);
for (size_t i = 0; i < h_bucket.size(); i += 1) {
h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
}
// std::vector<GRID_POINTER> h_gpointer(grid_numbers);
for (size_t i = 0; i < h_gpointer.size(); i += 1) {
h_gpointer[i].grid_serial = gpointer + i * 125;
}
@@ -96,16 +98,15 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
reinterpret_cast<cudaStream_t>(stream_ptr));

Neighbor_List_Update(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, Nxy,
cutoff_square, cutoff_with_skin_square, grid_N, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, max_neighbor_numbers,
reinterpret_cast<cudaStream_t>(stream_ptr));
CopyNeighborListAtomNumber(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
ConstructNeighborListHalf(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
reinterpret_cast<cudaStream_t>(stream_ptr));
NeighborListUpdate(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
excluded_list, excluded_numbers, half_skin_square, need_refresh_flag,
reinterpret_cast<cudaStream_t>(stream_ptr));
CopyNeighborListHalf(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

@@ -151,7 +152,7 @@ class NeighborListUpdateGpuKernel : public GpuKernel {
int atom_numbers;
int grid_numbers;
int refresh_interval;
int Nxy;
int nxy;
int max_atom_in_grid_numbers;
int max_neighbor_numbers;
int excluded_atom_numbers;


+ 49
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc View File

@@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* NeighborListRefresh. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(NeighborListRefresh,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
NeighborListUpdateNewGpuKernel, int, float)
} // namespace kernel
} // namespace mindspore

+ 179
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h View File

@@ -0,0 +1,179 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* NeighborListRefresh. This is an experimental interface that is subject to change and/or deletion.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_

#include <cuda_runtime_api.h>
#include <cufft.h>
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "runtime/device/gpu/cuda_common.h"
#include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh"

namespace mindspore {
namespace kernel {
template <typename T, typename T1>
class NeighborListUpdateNewGpuKernel : public GpuKernel {
public:
NeighborListUpdateNewGpuKernel() : skin(2.0), cutoff(9.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {}
~NeighborListUpdateNewGpuKernel() override = default;
bool Init(const CNodePtr &kernel_node) override {
grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers"));
atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers"));
refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval"));
not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time"));
nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "nxy"));
excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers"));

cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square"));
half_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "half_skin_square"));
cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin"));
half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin"));
cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square"));
forced_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_update"));
forced_check = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_check"));
h_bucket.resize(grid_numbers);
h_gpointer.resize(grid_numbers);
InitSizeLists();
return true;
}

const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }

bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto atom_numbers_in_grid_bucket = GetDeviceAddress<int>(inputs, 0);
auto bucket = GetDeviceAddress<int>(inputs, 1);
auto crd = GetDeviceAddress<float>(inputs, 2);
auto box_length = GetDeviceAddress<float>(inputs, 3);
auto grid_n = GetDeviceAddress<int>(inputs, 4);
auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5);
auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6);
auto old_crd = GetDeviceAddress<float>(inputs, 7);
auto crd_to_uint_crd_cof = GetDeviceAddress<float>(inputs, 8);
auto uint_crd = GetDeviceAddress<unsigned int>(inputs, 9);
auto gpointer = GetDeviceAddress<int>(inputs, 10);
auto nl_atom_numbers = GetDeviceAddress<int>(inputs, 11);
auto nl_atom_serial = GetDeviceAddress<int>(inputs, 12);
auto uint_dr_to_dr_cof = GetDeviceAddress<float>(inputs, 13);
auto excluded_list_start = GetDeviceAddress<int>(inputs, 14);
auto excluded_list = GetDeviceAddress<int>(inputs, 15);
auto excluded_numbers = GetDeviceAddress<int>(inputs, 16);
auto need_refresh_flag = GetDeviceAddress<int>(inputs, 17);
auto d_refresh_count = GetDeviceAddress<int>(inputs, 18);

GRID_BUCKET *d_bucket = reinterpret_cast<GRID_BUCKET *>(GetDeviceAddress<int>(workspaces, 0));
GRID_POINTER *d_gpointer = reinterpret_cast<GRID_POINTER *>(GetDeviceAddress<int>(workspaces, 1));
NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2);
float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3);

for (size_t i = 0; i < h_bucket.size(); i += 1) {
h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers;
}
for (size_t i = 0; i < h_gpointer.size(); i += 1) {
h_gpointer[i].grid_serial = gpointer + i * 125;
}

cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr));
ConstructNeighborList(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl,
reinterpret_cast<cudaStream_t>(stream_ptr));

NeighborListRefresh(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy,
cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket,
grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof,
half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start,
excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, forced_update,
forced_check, reinterpret_cast<cudaStream_t>(stream_ptr));
CopyNeighborList(atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

protected:
void InitSizeLists() override {
input_size_list_.push_back(sizeof(int) * grid_numbers);
input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers);
input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
input_size_list_.push_back(sizeof(VECTOR));

input_size_list_.push_back(sizeof(INT_VECTOR));
input_size_list_.push_back(sizeof(VECTOR));
input_size_list_.push_back(sizeof(int) * atom_numbers);

input_size_list_.push_back(sizeof(VECTOR) * atom_numbers);
input_size_list_.push_back(sizeof(VECTOR));
input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers);

input_size_list_.push_back(sizeof(int) * grid_numbers * 125);
input_size_list_.push_back(sizeof(int) * atom_numbers);
input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers);
input_size_list_.push_back(sizeof(VECTOR));

input_size_list_.push_back(sizeof(int) * atom_numbers);
input_size_list_.push_back(sizeof(int) * excluded_atom_numbers);
input_size_list_.push_back(sizeof(int) * atom_numbers);

input_size_list_.push_back(sizeof(int));
input_size_list_.push_back(sizeof(int));

workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers);
workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers);
workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers);
workspace_size_list_.push_back(sizeof(float) * 3);

output_size_list_.push_back(sizeof(float));
}

private:
float skin;
float cutoff;
int not_first_time;
int atom_numbers;
int grid_numbers;
int refresh_interval;
int nxy;
int max_atom_in_grid_numbers;
int max_neighbor_numbers;
int excluded_atom_numbers;
float half_skin_square;
float cutoff_square;
float cutoff_with_skin;
float half_cutoff_with_skin;
float cutoff_with_skin_square;
int forced_update;
int forced_check;

std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
std::vector<GRID_BUCKET> h_bucket;
std::vector<GRID_POINTER> h_gpointer;
};
} // namespace kernel
} // namespace mindspore

#endif

+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.cc View File

@@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* RestrainEnergy. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/restrain/restrain_energy_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(restrainenergy,
MS_REG_GPU_KERNEL_TWO(RestrainEnergy,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)


+ 5
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_impl_kernel.cc View File

@@ -13,12 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*Note:
* RestrainForceWithAtomEnergyVirial. This is an experimental interface that is subject to change and/or deletion.
*/

#include "backend/kernel_compiler/gpu/sponge/restrain/restrain_force_atom_energy_virial_kernel.h"

namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(restrainforcewithatomenergyandvirial,
MS_REG_GPU_KERNEL_TWO(RestrainForceWithAtomEnergyVirial,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)


Loading…
Cancel
Save