From: @zhangxinfeng3 Reviewed-by: @ljl0711,@wang_zi_dong Signed-off-by: @wang_zi_dongtags/v1.2.0-rc1
| @@ -399,7 +399,8 @@ if(ENABLE_GPU) | |||
| ${CUDNN_LIBRARY_PATH} | |||
| ${CUDA_PATH}/lib64/libcudart.so | |||
| ${CUDA_PATH}/lib64/stubs/libcuda.so | |||
| ${CUDA_PATH}/lib64/libcusolver.so) | |||
| ${CUDA_PATH}/lib64/libcusolver.so | |||
| ${CUDA_PATH}/lib64/libcufft.so) | |||
| if(ENABLE_MPI) | |||
| set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) | |||
| endif() | |||
| @@ -0,0 +1,45 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh" | |||
| __global__ void GetCenterOfGeometryKernel(const int center_numbers, float center_numbers_inverse, | |||
| const int *center_atoms, const VECTOR *crd, VECTOR *center_of_geometry) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < center_numbers) { | |||
| int atom_i = center_atoms[i]; | |||
| VECTOR temp = center_numbers_inverse * crd[atom_i]; | |||
| atomicAdd(¢er_of_geometry[0].x, temp.x); | |||
| atomicAdd(¢er_of_geometry[0].y, temp.y); | |||
| atomicAdd(¢er_of_geometry[0].z, temp.z); | |||
| } | |||
| } | |||
| void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, | |||
| const float *crd_f, float *center_of_geometry_f, cudaStream_t stream) { | |||
| VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f)); | |||
| VECTOR *center_of_geometry = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(center_of_geometry_f)); | |||
| GetCenterOfGeometryKernel<<<ceilf(static_cast<float>(center_numbers) / 32), 32, 0, stream>>>( | |||
| center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, float *crd_f, | |||
| float *center_of_geometry_f, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, | |||
| const float *crd_f, float *center_of_geometry_f, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh" | |||
| __global__ void MDTemperatureKernel(const int residue_numbers, const int *start, const int *end, const VECTOR *atom_vel, | |||
| const float *atom_mass, float *ek) { | |||
| int residue_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (residue_i < residue_numbers) { | |||
| VECTOR momentum = {0., 0., 0.}; | |||
| float res_mass = 0.; | |||
| int s = start[residue_i]; | |||
| int e = end[residue_i]; | |||
| float mass_lin; | |||
| for (int atom_i = s; atom_i < e; atom_i = atom_i + 1) { | |||
| mass_lin = atom_mass[atom_i]; | |||
| momentum.x = momentum.x + mass_lin * atom_vel[atom_i].x; | |||
| momentum.y = momentum.y + mass_lin * atom_vel[atom_i].y; | |||
| momentum.z = momentum.z + mass_lin * atom_vel[atom_i].z; | |||
| res_mass = res_mass + mass_lin; | |||
| } | |||
| ek[residue_i] = 0.5 * (momentum.x * momentum.x + momentum.y * momentum.y + momentum.z * momentum.z) / res_mass * | |||
| 2. / 3. / CONSTANT_kB / residue_numbers; | |||
| } | |||
| } | |||
| void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, | |||
| const float *atom_mass, float *ek, cudaStream_t stream) { | |||
| VECTOR *atom_vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(atom_vel_f)); | |||
| MDTemperatureKernel<<<ceilf(static_cast<float>(residue_numbers) / 32), 32, 0, stream>>>(residue_numbers, start, end, | |||
| atom_vel, atom_mass, ek); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, | |||
| const float *atom_mass, float *ek, cudaStream_t stream); | |||
| @@ -0,0 +1,25 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, | |||
| const float *atom_mass, float *ek, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ | |||
| @@ -14,31 +14,59 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMONHW_H_ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <math.h> | |||
| #include <time.h> | |||
| #include <curand_kernel.h> | |||
| #include <cuda_runtime.h> | |||
| #include <device_launch_parameters.h> | |||
| #include <cufft.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #define CONSTANT_Pi 3.1415926535897932 | |||
| #define TWO_DIVIDED_BY_SQRT_PI 1.1283791670218446 | |||
| #define CONSTANT_kB 0.00198716 | |||
| static dim3 thread_LJ(8, 32); | |||
| struct VECTOR { | |||
| float x; | |||
| float y; | |||
| float z; | |||
| }; | |||
| struct INT_VECTOR { | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| }; | |||
| struct UNSIGNED_INT_VECTOR { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| struct UINT_VECTOR_LJ_TYPE { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| int LJ_type; | |||
| float charge; | |||
| }; | |||
| struct GRID_BUCKET { | |||
| int *atom_serial; | |||
| }; | |||
| struct GRID_POINTER { | |||
| int *grid_serial; | |||
| }; | |||
| __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a, | |||
| const UNSIGNED_INT_VECTOR uvec_b, | |||
| const VECTOR scaler) { | |||
| @@ -48,6 +76,15 @@ __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNE | |||
| dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z; | |||
| return dr; | |||
| } | |||
| __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a, | |||
| const UINT_VECTOR_LJ_TYPE uvec_b, | |||
| const VECTOR scaler) { | |||
| VECTOR dr; | |||
| dr.x = (static_cast<int>(uvec_a.uint_x - uvec_b.uint_x)) * scaler.x; | |||
| dr.y = (static_cast<int>(uvec_a.uint_y - uvec_b.uint_y)) * scaler.y; | |||
| dr.z = (static_cast<int>(uvec_a.uint_z - uvec_b.uint_z)) * scaler.z; | |||
| return dr; | |||
| } | |||
| __device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) { | |||
| VECTOR vec; | |||
| @@ -91,4 +128,124 @@ __device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VEC | |||
| return vec; | |||
| } | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SPONGE_COMMON_H_ | |||
| __global__ static void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, | |||
| int *nl_atom_serial, NEIGHBOR_LIST *nl) { | |||
| for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { | |||
| nl[i].atom_numbers = nl_atom_numbers[i]; | |||
| nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers; | |||
| } | |||
| } | |||
| static inline bool Malloc_Safely(void **address, size_t size) { | |||
| address[0] = NULL; | |||
| address[0] = reinterpret_cast<void *>(malloc(size)); | |||
| if (address[0] != NULL) { | |||
| return true; | |||
| } else { | |||
| printf("malloc failed!\n"); | |||
| getchar(); | |||
| return false; | |||
| } | |||
| } | |||
| static inline bool Cuda_Malloc_Safely(void **address, size_t size) { | |||
| cudaError_t cuda_error = cudaMalloc(&address[0], size); | |||
| if (cuda_error == 0) { | |||
| return true; | |||
| } else { | |||
| printf("cudaMalloc failed! error %d\n", cuda_error); | |||
| getchar(); | |||
| return false; | |||
| } | |||
| } | |||
| __global__ static void Copy_Crd_To_New_Crd_Start(const int atom_numbers, const UNSIGNED_INT_VECTOR *crd, | |||
| UINT_VECTOR_LJ_TYPE *new_crd, const int *LJ_type, | |||
| const float *charge) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| new_crd[atom_i].uint_x = crd[atom_i].uint_x; | |||
| new_crd[atom_i].uint_y = crd[atom_i].uint_y; | |||
| new_crd[atom_i].uint_z = crd[atom_i].uint_z; | |||
| new_crd[atom_i].LJ_type = LJ_type[atom_i]; | |||
| new_crd[atom_i].charge = charge[atom_i]; | |||
| } | |||
| } | |||
| __global__ static void Rand_Normal(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, | |||
| float4 *rand_float4) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < float4_numbers) { | |||
| rand_float4[i] = curand_normal4(&rand_state[i]); | |||
| } | |||
| } | |||
| __global__ static void Setup_Rand_Normal_Kernel(const int float4_numbers, curandStatePhilox4_32_10_t *rand_state, | |||
| const int seed) { | |||
| int id = threadIdx.x + blockIdx.x * blockDim.x; | |||
| /* Each thread gets same seed, a different sequence | |||
| number, no offset */ | |||
| if (id < float4_numbers) { | |||
| curand_init(seed, id, 0, &rand_state[id]); | |||
| } | |||
| } | |||
| __global__ static void Reset_List(const int element_numbers, int *list, const int replace_element) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = replace_element; | |||
| } | |||
| } | |||
| __global__ static void Reset_List(const int element_numbers, float *list, const float replace_element) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = replace_element; | |||
| } | |||
| } | |||
| __global__ static void Sum_Of_List(const int element_numbers, const float *list, float *sum) { | |||
| if (threadIdx.x == 0) { | |||
| sum[0] = 0.; | |||
| } | |||
| __syncthreads(); | |||
| float lin = 0.; | |||
| for (int i = threadIdx.x; i < element_numbers; i = i + blockDim.x) { | |||
| lin = lin + list[i]; | |||
| } | |||
| atomicAdd(sum, lin); | |||
| } | |||
| __global__ static void Scale_List(const int element_numbers, float *list, float scaler) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = list[i] * scaler; | |||
| } | |||
| } | |||
| __global__ static void Copy_List(const int element_numbers, const int *origin_list, int *list) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = origin_list[i]; | |||
| } | |||
| } | |||
| __global__ static void Copy_List(const int element_numbers, const float *origin_list, float *list) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = origin_list[i]; | |||
| } | |||
| } | |||
| __global__ static void Print(const size_t size, const float *input_x) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| printf("%f\n", input_x[i]); | |||
| } | |||
| return; | |||
| } | |||
| __global__ static void Print(const size_t size, const int *input_x) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| printf("%d\n", input_x[i]); | |||
| } | |||
| return; | |||
| } | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_SPONGE_H_ | |||
| @@ -29,8 +29,6 @@ __global__ void DihedralAtomEnergyKernel(int dihedral_numbers, const UNSIGNED_IN | |||
| int atom_k = atom_c[dihedral_i]; | |||
| int atom_l = atom_d[dihedral_i]; | |||
| int temp_ipn = ipn[dihedral_i]; | |||
| float temp_pk = pk[dihedral_i]; | |||
| float temp_pn = pn[dihedral_i]; | |||
| float temp_gamc = gamc[dihedral_i]; | |||
| @@ -29,8 +29,6 @@ __global__ void DihedralEnergyKernel(int dihedral_numbers, const UNSIGNED_INT_VE | |||
| int atom_k = atom_c[dihedral_i]; | |||
| int atom_l = atom_d[dihedral_i]; | |||
| int temp_ipn = ipn[dihedral_i]; | |||
| float temp_pk = pk[dihedral_i]; | |||
| float temp_pn = pn[dihedral_i]; | |||
| float temp_gamc = gamc[dihedral_i]; | |||
| @@ -31,7 +31,6 @@ __global__ void DihedralForceKernel(int dihedral_numbers, const UNSIGNED_INT_VEC | |||
| int temp_ipn = ipn[dihedral_i]; | |||
| float temp_pk = pk[dihedral_i]; | |||
| float temp_pn = pn[dihedral_i]; | |||
| float temp_gamc = gamc[dihedral_i]; | |||
| float temp_gams = gams[dihedral_i]; | |||
| @@ -0,0 +1,102 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void LJ_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B, | |||
| const float cutoff_square, float *lj_ene) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int N = nl_i.atom_numbers; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_6; | |||
| float ene_lin = 0.; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| for (int j = threadIdx.y; j < N; j = j + blockDim.y) { | |||
| atom_j = nl_i.atom_serial[j]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| if (dr2 < cutoff_square) { | |||
| dr_2 = 1. / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_6 = dr_4 * dr_2; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| dr_2 = (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6; | |||
| ene_lin = ene_lin + dr_2; | |||
| } | |||
| } | |||
| atomicAdd(&lj_ene[atom_i], ene_lin); | |||
| } | |||
| } | |||
| void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, | |||
| cudaStream_t stream) { | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| int max_neighbor_numbers = 800; | |||
| NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl); | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); | |||
| Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, d_LJ_energy_atom, 0.); | |||
| LJ_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>( | |||
| atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, d_LJ_energy_atom); | |||
| return; | |||
| } | |||
| void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_ENERGY_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, | |||
| cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,116 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void LJ_Force_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const float *LJ_type_A, const float *LJ_type_B, | |||
| const float cutoff_square, VECTOR *frc) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int N = nl_i.atom_numbers; | |||
| int B = ceilf(static_cast<float>(N) / blockDim.y); | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_14; | |||
| float frc_abs = 0.; | |||
| VECTOR frc_lin; | |||
| VECTOR frc_record = {0., 0., 0.}; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| for (int j = threadIdx.y * B; j < (threadIdx.y + 1) * B; j = j + 1) { | |||
| if (j < N) { | |||
| atom_j = nl_i.atom_serial[j]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| if (dr2 < cutoff_square) { | |||
| dr_2 = 1. / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_14 = dr_8 * dr_4 * dr_2; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; | |||
| frc_lin.x = frc_abs * dr.x; | |||
| frc_lin.y = frc_abs * dr.y; | |||
| frc_lin.z = frc_abs * dr.z; | |||
| frc_record.x = frc_record.x + frc_lin.x; | |||
| frc_record.y = frc_record.y + frc_lin.y; | |||
| frc_record.z = frc_record.z + frc_lin.z; | |||
| atomicAdd(&frc[atom_j].x, -frc_lin.x); | |||
| atomicAdd(&frc[atom_j].y, -frc_lin.y); | |||
| atomicAdd(&frc[atom_j].z, -frc_lin.z); | |||
| } | |||
| } | |||
| } | |||
| atomicAdd(&frc[atom_i].x, frc_record.x); | |||
| atomicAdd(&frc[atom_i].y, frc_record.y); | |||
| atomicAdd(&frc[atom_i].z, frc_record.z); | |||
| } | |||
| } | |||
| void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, | |||
| cudaStream_t stream) { | |||
| VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f); | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| int max_neighbor_numbers = 800; | |||
| NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl); | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); | |||
| LJ_Force_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>( | |||
| atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff_square, frc); | |||
| return; | |||
| } | |||
| void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, | |||
| int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,132 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void LJ_Force_With_Direct_CF_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, | |||
| const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, | |||
| const float *LJ_type_A, const float *LJ_type_B, const float cutoff, | |||
| VECTOR *frc, const float pme_beta, const float sqrt_pi) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int N = nl_i.atom_numbers; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_6; | |||
| float frc_abs = 0.; | |||
| VECTOR frc_lin; | |||
| VECTOR frc_record = {0., 0., 0.}; | |||
| float charge_i = r1.charge; | |||
| float charge_j; | |||
| float dr_abs; | |||
| float dr_1; | |||
| float beta_dr; | |||
| float frc_cf_abs; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| for (int j = threadIdx.y; j < N; j = j + blockDim.y) { | |||
| atom_j = nl_i.atom_serial[j]; | |||
| r2 = uint_crd[atom_j]; | |||
| charge_j = r2.charge; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| if (dr_abs < cutoff) { | |||
| dr_1 = 1. / dr_abs; | |||
| dr_2 = dr_1 * dr_1; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_6 = dr_4 * dr_2; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8; | |||
| beta_dr = pme_beta * dr_abs; | |||
| frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); | |||
| frc_cf_abs = frc_cf_abs * dr_2 * dr_1; | |||
| frc_cf_abs = charge_i * charge_j * frc_cf_abs; | |||
| frc_abs = frc_abs - frc_cf_abs; | |||
| frc_lin.x = frc_abs * dr.x; | |||
| frc_lin.y = frc_abs * dr.y; | |||
| frc_lin.z = frc_abs * dr.z; | |||
| frc_record.x = frc_record.x + frc_lin.x; | |||
| frc_record.y = frc_record.y + frc_lin.y; | |||
| frc_record.z = frc_record.z + frc_lin.z; | |||
| atomicAdd(&frc[atom_j].x, -frc_lin.x); | |||
| atomicAdd(&frc[atom_j].y, -frc_lin.y); | |||
| atomicAdd(&frc[atom_j].z, -frc_lin.z); | |||
| } | |||
| } | |||
| atomicAdd(&frc[atom_i].x, frc_record.x); | |||
| atomicAdd(&frc[atom_i].y, frc_record.y); | |||
| atomicAdd(&frc[atom_i].z, frc_record.z); | |||
| } | |||
| } | |||
| void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, | |||
| int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, | |||
| const float *d_LJ_B, float *frc_f, cudaStream_t stream) { | |||
| VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f); | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| int max_neighbor_numbers = 800; | |||
| NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl); | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); | |||
| LJ_Force_With_Direct_CF_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>( | |||
| atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI); | |||
| return; | |||
| } | |||
| void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, | |||
| int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, | |||
| const float *d_LJ_B, float *frc_f, cudaStream_t stream); | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, | |||
| int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, | |||
| const float *d_LJ_B, float *frc_f, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,80 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14CFAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *cf_scale_factor, float *ene) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; | |||
| UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| VECTOR dr; | |||
| float r_1; | |||
| float ene_lin = 0.; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| r_1 = rnorm3df(dr.x, dr.y, dr.z); | |||
| ene_lin = r1.charge * r2.charge * r_1; | |||
| ene_lin *= cf_scale_factor[dihedral_14_i]; | |||
| atomicAdd(&ene[atom_i], ene_lin); | |||
| } | |||
| } | |||
| void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); | |||
| Dihedral14CFAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); | |||
| @@ -0,0 +1,25 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H | |||
| @@ -0,0 +1,80 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *cf_scale_factor, float *ene) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; | |||
| UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| VECTOR dr; | |||
| float r_1; | |||
| float ene_lin = 0.; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| r_1 = rnorm3df(dr.x, dr.y, dr.z); | |||
| ene_lin = r1.charge * r2.charge * r_1; | |||
| ene_lin *= cf_scale_factor[dihedral_14_i]; | |||
| ene[dihedral_14_i] = ene_lin; | |||
| } | |||
| } | |||
| void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *cf_scale_factor, float *ene, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); | |||
| Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *cf_scale_factor, float *ene, cudaStream_t stream); | |||
| @@ -0,0 +1,25 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength, const int *a_14, const int *b_14, | |||
| const float *cf_scale_factor, float *ene, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H | |||
| @@ -0,0 +1,102 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJAtomEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *ene) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; | |||
| UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_6; | |||
| float dr_12; | |||
| float ene_lin = 0.; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1. / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_6 = dr_4 * dr_2; | |||
| dr_12 = dr_6 * dr_6; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 - | |||
| 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘 | |||
| ene_lin *= lj_scale_factor[dihedral_14_i]; | |||
| atomicAdd(&ene[atom_i], ene_lin); | |||
| } | |||
| } | |||
| void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *ene, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, ene, 0.); | |||
| Dihedral14LJAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *ene, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *ene, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H | |||
| @@ -0,0 +1,140 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJCFForceWithAtomEnergyKernel(const int dihedral_14_numbers, | |||
| const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, | |||
| const int *a_14, const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, VECTOR *frc, float *atom_energy) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1, r2; | |||
| VECTOR dr; | |||
| float dr_abs; | |||
| float dr2; | |||
| float dr_1; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_14; | |||
| float frc_abs = 0.; | |||
| VECTOR temp_frc; | |||
| float ene_lin; | |||
| float ene_lin2; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| r1 = uint_crd[atom_i]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1.0 / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_14 = dr_8 * dr_4 * dr_2; | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| dr_1 = 1. / dr_abs; | |||
| float charge_i = r1.charge; | |||
| float charge_j = r2.charge; | |||
| float frc_cf_abs; | |||
| frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1; | |||
| frc_cf_abs = -charge_i * charge_j * frc_cf_abs; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; | |||
| frc_abs *= lj_scale_factor[dihedral_14_i]; | |||
| frc_abs += frc_cf_abs; | |||
| temp_frc.x = frc_abs * dr.x; | |||
| temp_frc.y = frc_abs * dr.y; | |||
| temp_frc.z = frc_abs * dr.z; | |||
| atomicAdd(&frc[atom_j].x, -temp_frc.x); | |||
| atomicAdd(&frc[atom_j].y, -temp_frc.y); | |||
| atomicAdd(&frc[atom_j].z, -temp_frc.z); | |||
| atomicAdd(&frc[atom_i].x, temp_frc.x); | |||
| atomicAdd(&frc[atom_i].y, temp_frc.y); | |||
| atomicAdd(&frc[atom_i].z, temp_frc.z); | |||
| ene_lin = r1.charge * r2.charge * dr_1; | |||
| ene_lin *= cf_scale_factor[dihedral_14_i]; | |||
| ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 - | |||
| 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘 | |||
| ene_lin2 *= lj_scale_factor[dihedral_14_i]; | |||
| atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2); | |||
| } | |||
| } | |||
| void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, | |||
| const int *a_14, const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | |||
| float *frc_f, float *atom_energy, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(atom_numbers, atom_energy, 0.); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| Dihedral14LJCFForceWithAtomEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, | |||
| LJ_type_B, frc, atom_energy); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, | |||
| const float *LJ_type_A, const float *LJ_type_B, float *frc, float *atom_energy, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, | |||
| const int *a_14, const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | |||
| float *frc, float *atom_energy, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| @@ -0,0 +1,102 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJEnergyKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | |||
| float *ene) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i]; | |||
| UINT_VECTOR_LJ_TYPE r2 = uint_crd[atom_j]; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_6; | |||
| float dr_12; | |||
| float ene_lin = 0.; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1. / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_6 = dr_4 * dr_2; | |||
| dr_12 = dr_6 * dr_6; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| ene_lin = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_12 - | |||
| 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_6; // LJ的A,B系数已经乘以12和6因此要反乘 | |||
| ene_lin *= lj_scale_factor[dihedral_14_i]; | |||
| ene[dihedral_14_i] = ene_lin; | |||
| } | |||
| } | |||
| void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, | |||
| cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(dihedral_14_numbers, ene, 0.); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *ene, | |||
| cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H | |||
| @@ -0,0 +1,111 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJForceKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | |||
| VECTOR *frc) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1, r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_14; | |||
| float frc_abs = 0.; | |||
| VECTOR temp_frc; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| r1 = uint_crd[atom_i]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1.0 / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_14 = dr_8 * dr_4 * dr_2; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; | |||
| frc_abs *= lj_scale_factor[dihedral_14_i]; | |||
| temp_frc.x = frc_abs * dr.x; | |||
| temp_frc.y = frc_abs * dr.y; | |||
| temp_frc.z = frc_abs * dr.z; | |||
| atomicAdd(&frc[atom_j].x, -temp_frc.x); | |||
| atomicAdd(&frc[atom_j].y, -temp_frc.y); | |||
| atomicAdd(&frc[atom_j].z, -temp_frc.z); | |||
| atomicAdd(&frc[atom_i].x, temp_frc.x); | |||
| atomicAdd(&frc[atom_i].y, temp_frc.y); | |||
| atomicAdd(&frc[atom_i].z, temp_frc.z); | |||
| } | |||
| } | |||
| void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, | |||
| cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| cudaStreamSynchronize(stream); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| Dihedral14LJForceKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, frc); | |||
| cudaStreamSynchronize(stream); | |||
| return; | |||
| } | |||
| void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | |||
| const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, | |||
| cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H | |||
| @@ -0,0 +1,124 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJForceWithDirectCFKernel(const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, | |||
| const VECTOR *boxlength, const int *a_14, const int *b_14, | |||
| const float *lj_scale_factor, const float *cf_scale_factor, | |||
| const float *LJ_type_A, const float *LJ_type_B, VECTOR *frc) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1, r2; | |||
| VECTOR dr; | |||
| float dr_abs; | |||
| float dr2; | |||
| float dr_1; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_14; | |||
| float frc_abs = 0.; | |||
| VECTOR temp_frc; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| r1 = uint_crd[atom_i]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1.0 / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_14 = dr_8 * dr_4 * dr_2; | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| dr_1 = 1. / dr_abs; | |||
| float charge_i = r1.charge; | |||
| float charge_j = r2.charge; | |||
| float frc_cf_abs; | |||
| frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1; | |||
| frc_cf_abs = -charge_i * charge_j * frc_cf_abs; | |||
| // LJ | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; | |||
| frc_abs *= lj_scale_factor[dihedral_14_i]; | |||
| frc_abs += frc_cf_abs; | |||
| temp_frc.x = frc_abs * dr.x; | |||
| temp_frc.y = frc_abs * dr.y; | |||
| temp_frc.z = frc_abs * dr.z; | |||
| atomicAdd(&frc[atom_j].x, -temp_frc.x); | |||
| atomicAdd(&frc[atom_j].y, -temp_frc.y); | |||
| atomicAdd(&frc[atom_j].z, -temp_frc.z); | |||
| atomicAdd(&frc[atom_i].x, temp_frc.x); | |||
| atomicAdd(&frc[atom_i].y, temp_frc.y); | |||
| atomicAdd(&frc[atom_i].z, temp_frc.z); | |||
| } | |||
| } | |||
| void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, | |||
| const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = NULL; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&uint_crd_with_LJ), sizeof(UINT_VECTOR_LJ_TYPE) * atom_numbers); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| cudaStreamSynchronize(stream); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| Dihedral14LJForceWithDirectCFKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, | |||
| LJ_type_B, frc); | |||
| return; | |||
| } | |||
| void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, | |||
| const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, | |||
| const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, | |||
| const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H | |||
| @@ -0,0 +1,419 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" | |||
| __global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = origin_list[i]; | |||
| } | |||
| } | |||
| __global__ void Copy_List(const int element_numbers, const float *origin_list, float *list) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = origin_list[i]; | |||
| } | |||
| } | |||
| __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, const VECTOR *crd, | |||
| UNSIGNED_INT_VECTOR *uint_crd) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0]; | |||
| uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1]; | |||
| uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2]; | |||
| /*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x; | |||
| uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y; | |||
| uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/ | |||
| uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1; | |||
| uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1; | |||
| uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1; | |||
| } | |||
| } | |||
| __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR translation_vec) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < vector_numbers) { | |||
| vec_list[i].x = vec_list[i].x + translation_vec.x; | |||
| vec_list[i].y = vec_list[i].y + translation_vec.y; | |||
| vec_list[i].z = vec_list[i].z + translation_vec.z; | |||
| } | |||
| } | |||
| __global__ void Vector_Translation(const int vector_numbers, VECTOR *vec_list, const VECTOR *translation_vec) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < vector_numbers) { | |||
| vec_list[i].x = vec_list[i].x + translation_vec[0].x; | |||
| vec_list[i].y = vec_list[i].y + translation_vec[0].y; | |||
| vec_list[i].z = vec_list[i].z + translation_vec[0].z; | |||
| } | |||
| } | |||
| __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const float *box_length) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| if (crd[atom_i].x >= 0) { | |||
| if (crd[atom_i].x < box_length[0]) { | |||
| } else { | |||
| crd[atom_i].x = crd[atom_i].x - box_length[0]; | |||
| } | |||
| } else { | |||
| crd[atom_i].x = crd[atom_i].x + box_length[0]; | |||
| } | |||
| if (crd[atom_i].y >= 0) { | |||
| if (crd[atom_i].y < box_length[1]) { | |||
| } else { | |||
| crd[atom_i].y = crd[atom_i].y - box_length[1]; | |||
| } | |||
| } else { | |||
| crd[atom_i].y = crd[atom_i].y + box_length[1]; | |||
| } | |||
| if (crd[atom_i].z >= 0) { | |||
| if (crd[atom_i].z < box_length[2]) { | |||
| } else { | |||
| crd[atom_i].z = crd[atom_i].z - box_length[2]; | |||
| } | |||
| } else { | |||
| crd[atom_i].z = crd[atom_i].z + box_length[2]; | |||
| } | |||
| } | |||
| } | |||
| __global__ void Clear_Grid_Bucket(const int grid_numbers, int *atom_numbers_in_grid_bucket, GRID_BUCKET *bucket) { | |||
| int grid_serial = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (grid_serial < grid_numbers) { | |||
| GRID_BUCKET bucket_i = bucket[grid_serial]; | |||
| for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial]; i = i + 1) { | |||
| bucket_i.atom_serial[i] = -1; | |||
| } | |||
| atom_numbers_in_grid_bucket[grid_serial] = 0; | |||
| } | |||
| } | |||
| __global__ void Find_Atom_In_Grid_Serial(const int atom_numbers, const float *grid_length_inverse, const VECTOR *crd, | |||
| const int *grid_N, const int gridxy, int *atom_in_grid_serial) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int Nx = static_cast<float>(crd[atom_i].x) * grid_length_inverse[0]; // crd.x must < boxlength.x | |||
| int Ny = static_cast<float>(crd[atom_i].y) * grid_length_inverse[1]; | |||
| int Nz = static_cast<float>(crd[atom_i].z) * grid_length_inverse[2]; | |||
| Nx = Nx & ((Nx - grid_N[0]) >> 31); | |||
| Ny = Ny & ((Ny - grid_N[1]) >> 31); | |||
| Nz = Nz & ((Nz - grid_N[2]) >> 31); | |||
| atom_in_grid_serial[atom_i] = Nz * gridxy + Ny * grid_N[0] + Nx; | |||
| } | |||
| } | |||
| __global__ void Put_Atom_In_Grid_Bucket(const int atom_numbers, const int *atom_in_grid_serial, GRID_BUCKET *bucket, | |||
| int *atom_numbers_in_grid_bucket) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int grid_serial = atom_in_grid_serial[atom_i]; | |||
| GRID_BUCKET bucket_i = bucket[grid_serial]; | |||
| int a = atom_numbers_in_grid_bucket[grid_serial]; | |||
| atomicCAS(&bucket_i.atom_serial[a], -1, atom_i); | |||
| if (bucket_i.atom_serial[a] != atom_i) { | |||
| while (true) { | |||
| a = a + 1; | |||
| atomicCAS(&bucket_i.atom_serial[a], -1, atom_i); | |||
| if (bucket_i.atom_serial[a] == atom_i) { | |||
| atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1); | |||
| break; | |||
| } | |||
| } | |||
| } else { | |||
| atomicAdd(&atom_numbers_in_grid_bucket[grid_serial], 1); | |||
| } | |||
| } | |||
| } | |||
| __global__ void Find_atom_neighbors(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, | |||
| const float *uint_dr_to_dr_cof, const int *atom_in_grid_serial, | |||
| const GRID_POINTER *gpointer, const GRID_BUCKET *bucket, | |||
| const int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *nl, | |||
| const float cutoff_skin_square) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int grid_serial = atom_in_grid_serial[atom_i]; | |||
| int grid_serial2; | |||
| int atom_numbers_in_nl_lin = 0; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UNSIGNED_INT_VECTOR uint_crd_i = uint_crd[atom_i]; | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| GRID_POINTER gpointer_i = gpointer[grid_serial]; | |||
| VECTOR dr; | |||
| float dr2; | |||
| for (int grid_cycle = 0; grid_cycle < 125; grid_cycle = grid_cycle + 1) { | |||
| grid_serial2 = gpointer_i.grid_serial[grid_cycle]; | |||
| GRID_BUCKET bucket_i = bucket[grid_serial2]; | |||
| for (int i = 0; i < atom_numbers_in_grid_bucket[grid_serial2]; i = i + 1) { | |||
| atom_j = bucket_i.atom_serial[i]; | |||
| if (atom_j > atom_i) { | |||
| int_x = uint_crd[atom_j].uint_x - uint_crd_i.uint_x; | |||
| int_y = uint_crd[atom_j].uint_y - uint_crd_i.uint_y; | |||
| int_z = uint_crd[atom_j].uint_z - uint_crd_i.uint_z; | |||
| dr.x = uint_dr_to_dr_cof[0] * int_x; | |||
| dr.y = uint_dr_to_dr_cof[1] * int_y; | |||
| dr.z = uint_dr_to_dr_cof[2] * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| if (dr2 < cutoff_skin_square) { | |||
| nl_i.atom_serial[atom_numbers_in_nl_lin] = atom_j; | |||
| atom_numbers_in_nl_lin = atom_numbers_in_nl_lin + 1; | |||
| } | |||
| } | |||
| } | |||
| } // 124 grid cycle | |||
| nl[atom_i].atom_numbers = atom_numbers_in_nl_lin; | |||
| } | |||
| } | |||
| __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd, | |||
| const float half_skin_square, int *need_refresh_flag) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < atom_numbers) { | |||
| VECTOR r1 = crd[i]; | |||
| VECTOR r2 = old_crd[i]; | |||
| r1.x = r1.x - r2.x; | |||
| r1.y = r1.y - r2.y; | |||
| r1.z = r1.z - r2.z; | |||
| float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z; | |||
| if (r1_2 > half_skin_square) { | |||
| atomicExch(&need_refresh_flag[0], 1); | |||
| } | |||
| } | |||
| } | |||
| __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl, | |||
| const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int excluded_number = excluded_atom_numbers[atom_i]; | |||
| if (excluded_number > 0) { | |||
| int list_start = excluded_list_start[atom_i]; | |||
| int atom_min = excluded_list[list_start]; | |||
| int list_end = list_start + excluded_number; | |||
| int atom_max = excluded_list[list_end - 1]; | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int atomnumbers_in_nl_lin = nl_i.atom_numbers; | |||
| int atom_j; | |||
| int excluded_atom_numbers_lin = list_end - list_start; | |||
| int excluded_atom_numbers_count = 0; | |||
| for (int i = 0; i < atomnumbers_in_nl_lin; i = i + 1) { | |||
| atom_j = nl_i.atom_serial[i]; | |||
| if (atom_j < atom_min || atom_j > atom_max) { | |||
| continue; | |||
| } else { | |||
| for (int j = list_start; j < list_end; j = j + 1) { | |||
| if (atom_j == excluded_list[j]) { | |||
| atomnumbers_in_nl_lin = atomnumbers_in_nl_lin - 1; | |||
| nl_i.atom_serial[i] = nl_i.atom_serial[atomnumbers_in_nl_lin]; | |||
| excluded_atom_numbers_count = excluded_atom_numbers_count + 1; | |||
| i = i - 1; | |||
| } | |||
| } | |||
| if (excluded_atom_numbers_count < excluded_atom_numbers_lin) { | |||
| } else { | |||
| break; | |||
| } // break | |||
| } // in the range of excluded min to max | |||
| } // cycle for neighbors | |||
| nl[atom_i].atom_numbers = atomnumbers_in_nl_lin; | |||
| } // if need excluded | |||
| } | |||
| } | |||
| void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, VECTOR *old_crd, | |||
| UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, float *uint_dr_to_dr_cof, | |||
| int *atom_in_grid_serial, const float skin, float *box_length, const GRID_POINTER *gpointer, | |||
| GRID_BUCKET *bucket, int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, | |||
| float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, | |||
| cudaStream_t stream) { | |||
| if (refresh_sign[0] == 1) { | |||
| VECTOR trans_vec = {-skin, -skin, -skin}; | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| trans_vec); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| trans_vec.x = -trans_vec.x; | |||
| trans_vec.y = -trans_vec.y; | |||
| trans_vec.z = -trans_vec.z; | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| trans_vec); | |||
| Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>( | |||
| 3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd)); | |||
| Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); | |||
| Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, crd_to_uint_crd_cof, crd, uint_crd); | |||
| Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, | |||
| d_nl, cutoff_skin_square); | |||
| Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, | |||
| stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers); | |||
| refresh_sign[0] = 0; | |||
| } | |||
| } | |||
| void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, | |||
| VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, | |||
| float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, | |||
| float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket, | |||
| int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start, | |||
| int *excluded_list, int *excluded_numbers, float cutoff_skin_square, | |||
| int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, | |||
| cudaStream_t stream) { | |||
| VECTOR trans_vec = {skin, skin, skin}; | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>( | |||
| 3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd)); | |||
| Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); | |||
| Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof, | |||
| crd, uint_crd); | |||
| Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, | |||
| cutoff_skin_square); | |||
| Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, | |||
| stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers); | |||
| } | |||
| __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, | |||
| int *nl_atom_serial, NEIGHBOR_LIST *nl) { | |||
| for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { | |||
| nl[i].atom_numbers = nl_atom_numbers[i]; | |||
| nl[i].atom_serial = nl_atom_serial + i * max_neighbor_numbers; | |||
| } | |||
| } | |||
| void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, | |||
| NEIGHBOR_LIST *nl, cudaStream_t stream) { | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl); | |||
| } | |||
| void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square, | |||
| int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, | |||
| float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket, | |||
| VECTOR *crd, VECTOR *old_crd, float *crd_to_uint_crd_cof, | |||
| UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, | |||
| NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, | |||
| int *excluded_numbers, cudaStream_t stream) { | |||
| VECTOR trans_vec = {-skin, -skin, -skin}; | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| trans_vec.x = -trans_vec.x; | |||
| trans_vec.y = -trans_vec.y; | |||
| trans_vec.z = -trans_vec.z; | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream); | |||
| Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); | |||
| Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof, | |||
| crd, uint_crd); | |||
| Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, | |||
| cutoff_skin_square); | |||
| Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, d_nl, excluded_list_start, excluded_list, excluded_numbers); | |||
| } | |||
| __global__ void Mul_half(float *src, float *dst) { | |||
| int index = threadIdx.x; | |||
| if (index < 3) { | |||
| dst[index] = src[index] * 0.5; | |||
| } | |||
| } | |||
| void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, | |||
| int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, | |||
| int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, | |||
| float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, | |||
| float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, | |||
| int *is_need_refresh_neighbor_list, cudaStream_t stream) { | |||
| if (not_first_time) { | |||
| if (refresh_interval > 0) { | |||
| if (refresh_count % refresh_interval == 0) { | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_No_Check( | |||
| grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket, | |||
| grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream); | |||
| } | |||
| refresh_count += 1; | |||
| } else { | |||
| Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square, | |||
| is_need_refresh_neighbor_list); | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, | |||
| gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy, | |||
| stream); | |||
| } | |||
| } else { | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_First_Time( | |||
| is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof, | |||
| uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, | |||
| excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, | |||
| grid_N, Nxy, stream); | |||
| } | |||
| } | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ | |||
| struct VECTOR { | |||
| float x; | |||
| float y; | |||
| float z; | |||
| }; | |||
| struct INT_VECTOR { | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| }; | |||
| struct UNSIGNED_INT_VECTOR { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| struct GRID_BUCKET { | |||
| int *atom_serial; | |||
| }; | |||
| struct GRID_POINTER { | |||
| int *grid_serial; | |||
| }; | |||
| void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, | |||
| NEIGHBOR_LIST *nl, cudaStream_t stream); | |||
| void Neighbor_List_Update(int grid_numbers, int atom_numbers, int refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, | |||
| int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, | |||
| int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, | |||
| float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, | |||
| float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, | |||
| int *is_need_refresh_neighbor_list, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,139 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt, | |||
| const float exp_gamma, const float *inverse_mass, | |||
| const float *sqrt_mass_inverse, VECTOR *vel, VECTOR *crd, | |||
| VECTOR *frc, VECTOR *acc, VECTOR *random_frc) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < atom_numbers) { | |||
| acc[i].x = inverse_mass[i] * frc[i].x; | |||
| acc[i].y = inverse_mass[i] * frc[i].y; | |||
| acc[i].z = inverse_mass[i] * frc[i].z; | |||
| vel[i].x = vel[i].x + dt * acc[i].x; | |||
| vel[i].y = vel[i].y + dt * acc[i].y; | |||
| vel[i].z = vel[i].z + dt * acc[i].z; | |||
| crd[i].x = crd[i].x + half_dt * vel[i].x; | |||
| crd[i].y = crd[i].y + half_dt * vel[i].y; | |||
| crd[i].z = crd[i].z + half_dt * vel[i].z; | |||
| vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; | |||
| vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; | |||
| vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; | |||
| crd[i].x = crd[i].x + half_dt * vel[i].x; | |||
| crd[i].y = crd[i].y + half_dt * vel[i].y; | |||
| crd[i].z = crd[i].z + half_dt * vel[i].z; | |||
| frc[i].x = 0.; | |||
| frc[i].y = 0.; | |||
| frc[i].z = 0.; | |||
| } | |||
| } | |||
| __global__ void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity(const int atom_numbers, const float half_dt, | |||
| const float dt, const float exp_gamma, | |||
| const float *inverse_mass, | |||
| const float *sqrt_mass_inverse, VECTOR *vel, | |||
| VECTOR *crd, VECTOR *frc, VECTOR *acc, | |||
| VECTOR *random_frc, const float max_vel) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| float abs_vel; | |||
| if (i < atom_numbers) { | |||
| acc[i].x = inverse_mass[i] * frc[i].x; | |||
| acc[i].y = inverse_mass[i] * frc[i].y; | |||
| acc[i].z = inverse_mass[i] * frc[i].z; | |||
| vel[i].x = vel[i].x + dt * acc[i].x; | |||
| vel[i].y = vel[i].y + dt * acc[i].y; | |||
| vel[i].z = vel[i].z + dt * acc[i].z; | |||
| abs_vel = norm3df(vel[i].x, vel[i].y, vel[i].z); | |||
| if (abs_vel < max_vel) { | |||
| } else { | |||
| abs_vel = max_vel / abs_vel; | |||
| vel[i].x = abs_vel * vel[i].x; | |||
| vel[i].y = abs_vel * vel[i].y; | |||
| vel[i].z = abs_vel * vel[i].z; | |||
| } | |||
| crd[i].x = crd[i].x + half_dt * vel[i].x; | |||
| crd[i].y = crd[i].y + half_dt * vel[i].y; | |||
| crd[i].z = crd[i].z + half_dt * vel[i].z; | |||
| vel[i].x = exp_gamma * vel[i].x + sqrt_mass_inverse[i] * random_frc[i].x; | |||
| vel[i].y = exp_gamma * vel[i].y + sqrt_mass_inverse[i] * random_frc[i].y; | |||
| vel[i].z = exp_gamma * vel[i].z + sqrt_mass_inverse[i] * random_frc[i].z; | |||
| crd[i].x = crd[i].x + half_dt * vel[i].x; | |||
| crd[i].y = crd[i].y + half_dt * vel[i].y; | |||
| crd[i].z = crd[i].z + half_dt * vel[i].z; | |||
| frc[i].x = 0.; | |||
| frc[i].y = 0.; | |||
| frc[i].z = 0.; | |||
| } | |||
| } | |||
| void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, | |||
| const float exp_gamma, const int is_max_velocity, const float max_velocity, | |||
| const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, | |||
| float *frc_f, float *acc_f, cudaStream_t stream) { | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, vel_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, crd_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, frc_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128>>>(3 * atom_numbers, acc_f, 0.); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| VECTOR *vel = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(vel_f)); | |||
| VECTOR *acc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(acc_f)); | |||
| VECTOR *crd = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(crd_f)); | |||
| curandStatePhilox4_32_10_t *rand_state; | |||
| VECTOR *random_force; | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&random_force), sizeof(float4) * float4_numbers); | |||
| Cuda_Malloc_Safely(reinterpret_cast<void **>(&rand_state), sizeof(curandStatePhilox4_32_10_t) * float4_numbers); | |||
| Setup_Rand_Normal_Kernel<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32>>>(float4_numbers, rand_state, 1); | |||
| Rand_Normal<<<ceilf(static_cast<float>(float4_numbers) / 32.), 32, 0, stream>>>( | |||
| float4_numbers, rand_state, reinterpret_cast<float4 *>(random_force)); | |||
| if (!is_max_velocity) { | |||
| MD_Iteration_Leap_Frog_With_LiuJian<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, half_dt, dt, exp_gamma, d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, random_force); | |||
| } else { | |||
| MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Velocity<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, | |||
| stream>>>(atom_numbers, half_dt, dt, exp_gamma, | |||
| d_mass_inverse, d_sqrt_mass, vel, crd, frc, acc, | |||
| random_force, max_velocity); | |||
| cudaStreamSynchronize(stream); | |||
| cudaFree(random_force); | |||
| cudaFree(rand_state); | |||
| return; | |||
| } | |||
| } | |||
| void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, | |||
| const float exp_gamma, const int is_max_velocity, const float max_velocity, | |||
| const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, | |||
| float *frc_f, float *acc_f, cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void MDIterationLeapFrog(const int float4_numbers, const int atom_numbers, const float half_dt, const float dt, | |||
| const float exp_gamma, const int is_max_velocity, const float max_velocity, | |||
| const float *d_mass_inverse, const float *d_sqrt_mass, float *vel_f, float *crd_f, | |||
| float *frc_f, float *acc_f, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NVTIT_MD_ITERATION_LEAP_FROG_IMPL_H | |||
| @@ -0,0 +1,230 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_COMMON_H_ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __constant__ float PME_Ma[4] = {1.0 / 6.0, -0.5, 0.5, -1.0 / 6.0}; | |||
| __constant__ float PME_Mb[4] = {0, 0.5, -1, 0.5}; | |||
| __constant__ float PME_Mc[4] = {0, 0.5, 0, -0.5}; | |||
| __constant__ float PME_Md[4] = {0, 1.0 / 6.0, 4.0 / 6.0, 1.0 / 6.0}; | |||
| __constant__ float PME_dMa[4] = {0.5, -1.5, 1.5, -0.5}; | |||
| __constant__ float PME_dMb[4] = {0, 1, -2, 1}; | |||
| __constant__ float PME_dMc[4] = {0, 0.5, 0, -0.5}; | |||
| #define PI 3.1415926 | |||
| const float periodic_factor_inverse = 2.32830643e-10; | |||
| static dim3 thread_PME; | |||
| const float cutoff = 10.0; | |||
| const float tolerance = 0.00001; | |||
| static float M_(float u, int n) { | |||
| if (n == 2) { | |||
| if (u > 2 || u < 0) return 0; | |||
| return 1 - abs(u - 1); | |||
| } else { | |||
| return u / (n - 1) * M_(u, n - 1) + (n - u) / (n - 1) * M_(u - 1, n - 1); | |||
| } | |||
| } | |||
| static float Get_Beta(float cutoff, float tolerance) { | |||
| float beta, low, high, tempf; | |||
| int ilow, ihigh; | |||
| high = 1.0; | |||
| ihigh = 1; | |||
| while (1) { | |||
| tempf = erfc(high * cutoff) / cutoff; | |||
| if (tempf <= tolerance) break; | |||
| high *= 2; | |||
| ihigh++; | |||
| } | |||
| ihigh += 50; | |||
| low = 0.0; | |||
| for (ilow = 1; ilow < ihigh; ilow++) { | |||
| beta = (low + high) / 2; | |||
| tempf = erfc(beta * cutoff) / cutoff; | |||
| if (tempf >= tolerance) | |||
| low = beta; | |||
| else | |||
| high = beta; | |||
| } | |||
| return beta; | |||
| } | |||
| static cufftComplex expc(cufftComplex z) { | |||
| cufftComplex res; | |||
| float t = expf(z.x); | |||
| sincosf(z.y, &res.y, &res.x); | |||
| res.x *= t; | |||
| res.y *= t; | |||
| return res; | |||
| } | |||
| static float getb(int k, int NFFT, int B_order) { | |||
| cufftComplex tempc, tempc2, res; | |||
| float tempf; | |||
| tempc2.x = 0; | |||
| tempc2.y = 0; | |||
| tempc.x = 0; | |||
| tempc.y = 2 * (B_order - 1) * PI * k / NFFT; | |||
| res = expc(tempc); | |||
| for (int kk = 0; kk < (B_order - 1); kk++) { | |||
| tempc.x = 0; | |||
| tempc.y = 2 * PI * k / NFFT * kk; | |||
| tempc = expc(tempc); | |||
| tempf = M_(kk + 1, B_order); | |||
| tempc2.x += tempf * tempc.x; | |||
| tempc2.y += tempf * tempc.y; | |||
| } | |||
| res = cuCdivf(res, tempc2); | |||
| return res.x * res.x + res.y * res.y; | |||
| } | |||
| __global__ static void PME_Atom_Near(const UNSIGNED_INT_VECTOR *uint_crd, int *PME_atom_near, const int PME_Nin, | |||
| const float periodic_factor_inverse_x, const float periodic_factor_inverse_y, | |||
| const float periodic_factor_inverse_z, const int atom_numbers, const int fftx, | |||
| const int ffty, const int fftz, const UNSIGNED_INT_VECTOR *PME_kxyz, | |||
| UNSIGNED_INT_VECTOR *PME_uxyz, VECTOR *PME_frxyz) { | |||
| int atom = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom < atom_numbers) { | |||
| UNSIGNED_INT_VECTOR *temp_uxyz = &PME_uxyz[atom]; | |||
| int k, tempux, tempuy, tempuz; | |||
| float tempf; | |||
| tempf = static_cast<float> (uint_crd[atom].uint_x) * periodic_factor_inverse_x; | |||
| tempux = static_cast<int> (tempf); | |||
| PME_frxyz[atom].x = tempf - tempux; | |||
| tempf = static_cast<float> (uint_crd[atom].uint_y) * periodic_factor_inverse_y; | |||
| tempuy = static_cast<int> (tempf); | |||
| PME_frxyz[atom].y = tempf - tempuy; | |||
| tempf = static_cast<float> (uint_crd[atom].uint_z) * periodic_factor_inverse_z; | |||
| tempuz = static_cast<int> (tempf); | |||
| PME_frxyz[atom].z = tempf - tempuz; | |||
| if (tempux != (*temp_uxyz).uint_x || tempuy != (*temp_uxyz).uint_y || tempuz != (*temp_uxyz).uint_z) { | |||
| (*temp_uxyz).uint_x = tempux; | |||
| (*temp_uxyz).uint_y = tempuy; | |||
| (*temp_uxyz).uint_z = tempuz; | |||
| int *temp_near = PME_atom_near + atom * 64; | |||
| int kx, ky, kz; | |||
| for (k = 0; k < 64; k++) { | |||
| UNSIGNED_INT_VECTOR temp_kxyz = PME_kxyz[k]; | |||
| kx = tempux - temp_kxyz.uint_x; | |||
| if (kx < 0) kx += fftx; | |||
| ky = tempuy - temp_kxyz.uint_y; | |||
| if (ky < 0) ky += ffty; | |||
| kz = tempuz - temp_kxyz.uint_z; | |||
| if (kz < 0) kz += fftz; | |||
| temp_near[k] = kx * PME_Nin + ky * fftz + kz; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| __global__ static void PME_Q_Spread(int *PME_atom_near, const float *charge, const VECTOR *PME_frxyz, float *PME_Q, | |||
| const UNSIGNED_INT_VECTOR *PME_kxyz, const int atom_numbers) { | |||
| int atom = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom < atom_numbers) { | |||
| int k; | |||
| float tempf, tempQ, tempf2; | |||
| int *temp_near = PME_atom_near + atom * 64; | |||
| VECTOR temp_frxyz = PME_frxyz[atom]; | |||
| float tempcharge = charge[atom]; | |||
| UNSIGNED_INT_VECTOR temp_kxyz; | |||
| unsigned int kx; | |||
| for (k = threadIdx.y; k < 64; k = k + blockDim.y) { | |||
| temp_kxyz = PME_kxyz[k]; | |||
| kx = temp_kxyz.uint_x; | |||
| tempf = (temp_frxyz.x); | |||
| tempf2 = tempf * tempf; | |||
| tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempQ = tempcharge * tempf; | |||
| kx = temp_kxyz.uint_y; | |||
| tempf = (temp_frxyz.y); | |||
| tempf2 = tempf * tempf; | |||
| tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempQ = tempQ * tempf; | |||
| kx = temp_kxyz.uint_z; | |||
| tempf = (temp_frxyz.z); | |||
| tempf2 = tempf * tempf; | |||
| tempf = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempQ = tempQ * tempf; | |||
| atomicAdd(&PME_Q[temp_near[k]], tempQ); | |||
| } | |||
| } | |||
| } | |||
| __global__ static void PME_Direct_Energy(const int atom_numbers, const NEIGHBOR_LIST *nl, | |||
| const UNSIGNED_INT_VECTOR *uint_crd, const VECTOR *boxlength, | |||
| const float *charge, const float beta, const float cutoff_square, | |||
| float *direct_ene) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int N = nl_i.atom_numbers; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float dr_abs; | |||
| // float dr_inverse; | |||
| float ene_temp; | |||
| float charge_i = charge[atom_i]; | |||
| float ene_lin = 0.; | |||
| // int x, y; | |||
| // int atom_pair_LJ_type; | |||
| for (int j = threadIdx.y; j < N; j = j + blockDim.y) { | |||
| atom_j = nl_i.atom_serial[j]; | |||
| r2 = uint_crd[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| if (dr2 < cutoff_square) { | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| ene_temp = charge_i * charge[atom_j] * erfcf(beta * dr_abs) / dr_abs; | |||
| ene_lin = ene_lin + ene_temp; | |||
| } | |||
| } | |||
| atomicAdd(direct_ene, ene_lin); | |||
| } | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,234 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void PME_Energy_Product(const int element_number, const float *list1, const float *list2, float *sum) { | |||
| if (threadIdx.x == 0) { | |||
| sum[0] = 0.; | |||
| } | |||
| __syncthreads(); | |||
| float lin = 0.0; | |||
| for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { | |||
| lin = lin + list1[i] * list2[i]; | |||
| } | |||
| atomicAdd(sum, lin); | |||
| } | |||
| __global__ void PME_Energy_Reciprocal(const int element_number, const cufftComplex *FQ, const float *BC, float *sum) { | |||
| if (threadIdx.x == 0) { | |||
| sum[0] = 0.; | |||
| } | |||
| __syncthreads(); | |||
| float lin = 0.0; | |||
| cufftComplex FQ_i; | |||
| for (int i = threadIdx.x; i < element_number; i = i + blockDim.x) { | |||
| FQ_i = FQ[i]; | |||
| lin = lin + (FQ_i.x * FQ_i.x + FQ_i.y * FQ_i.y) * BC[i]; | |||
| } | |||
| atomicAdd(sum, lin); | |||
| } | |||
| __global__ void PME_Excluded_Energy_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, | |||
| const VECTOR *sacler, const float *charge, const float pme_beta, | |||
| const float sqrt_pi, const int *excluded_list_start, | |||
| const int *excluded_list, const int *excluded_atom_numbers, float *ene) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int excluded_number = excluded_atom_numbers[atom_i]; | |||
| if (excluded_number > 0) { | |||
| int list_start = excluded_list_start[atom_i]; | |||
| // int atom_min = excluded_list[list_start]; | |||
| int list_end = list_start + excluded_number; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| float charge_i = charge[atom_i]; | |||
| float charge_j; | |||
| float dr_abs; | |||
| float beta_dr; | |||
| UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float ene_lin = 0.; | |||
| for (int i = list_start; i < list_end; i = i + 1) { | |||
| atom_j = excluded_list[i]; | |||
| r2 = uint_crd[atom_j]; | |||
| charge_j = charge[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = sacler[0].x * int_x; | |||
| dr.y = sacler[0].y * int_y; | |||
| dr.z = sacler[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_abs = sqrtf(dr2); | |||
| beta_dr = pme_beta * dr_abs; | |||
| ene_lin -= charge_i * charge_j * erff(beta_dr) / dr_abs; | |||
| } | |||
| atomicAdd(ene, ene_lin); | |||
| } | |||
| } | |||
| } | |||
| void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, | |||
| int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *scaler_f, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, | |||
| float *d_correction_ene, cudaStream_t stream) { | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| int max_neighbor_numbers = 800; | |||
| NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl); | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); | |||
| std::vector<float> h_box_length(3); | |||
| cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost, | |||
| stream); | |||
| cudaStreamSynchronize(stream); | |||
| VECTOR *box_length = reinterpret_cast<VECTOR *>(h_box_length.data()); | |||
| UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz); | |||
| UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz); | |||
| VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz); | |||
| cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq); | |||
| cufftHandle PME_plan_r2c; | |||
| cufftHandle PME_plan_c2r; | |||
| cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); | |||
| cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); | |||
| cufftSetStream(PME_plan_r2c, stream); | |||
| cufftSetStream(PME_plan_c2r, stream); | |||
| thread_PME.x = 8; | |||
| thread_PME.y = 8; | |||
| int PME_Nin = ffty * fftz; | |||
| int PME_Nfft = fftx * ffty * (fftz / 2 + 1); | |||
| int PME_Nall = fftx * ffty * fftz; | |||
| float volume = box_length[0].x * box_length[0].y * box_length[0].z; | |||
| UNSIGNED_INT_VECTOR *PME_kxyz_cpu; | |||
| Malloc_Safely(reinterpret_cast<void **>(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64); | |||
| int kx, ky, kz, kxrp, kyrp, kzrp, index; | |||
| for (kx = 0; kx < 4; kx++) { | |||
| for (ky = 0; ky < 4; ky++) { | |||
| for (kz = 0; kz < 4; kz++) { | |||
| index = kx * 16 + ky * 4 + kz; | |||
| PME_kxyz_cpu[index].uint_x = kx; | |||
| PME_kxyz_cpu[index].uint_y = ky; | |||
| PME_kxyz_cpu[index].uint_z = kz; | |||
| } | |||
| } | |||
| } | |||
| cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream); | |||
| cudaStreamSynchronize(stream); | |||
| free(PME_kxyz_cpu); | |||
| // initial start | |||
| float *B1, *B2, *B3, *PME_BC0; | |||
| B1 = reinterpret_cast<float *>(malloc(sizeof(float) * fftx)); | |||
| B2 = reinterpret_cast<float *>(malloc(sizeof(float) * ffty)); | |||
| B3 = reinterpret_cast<float *>(malloc(sizeof(float) * fftz)); | |||
| PME_BC0 = reinterpret_cast<float *>(malloc(sizeof(float) * PME_Nfft)); | |||
| for (kx = 0; kx < fftx; kx++) { | |||
| B1[kx] = getb(kx, fftx, 4); | |||
| } | |||
| for (ky = 0; ky < ffty; ky++) { | |||
| B2[ky] = getb(ky, ffty, 4); | |||
| } | |||
| for (kz = 0; kz < fftz; kz++) { | |||
| B3[kz] = getb(kz, fftz, 4); | |||
| } | |||
| float mprefactor = PI * PI / -beta / beta; | |||
| float msq; | |||
| for (kx = 0; kx < fftx; kx++) { | |||
| kxrp = kx; | |||
| if (kx > fftx / 2) kxrp = fftx - kx; | |||
| for (ky = 0; ky < ffty; ky++) { | |||
| kyrp = ky; | |||
| if (ky > ffty / 2) kyrp = ffty - ky; | |||
| for (kz = 0; kz <= fftz / 2; kz++) { | |||
| kzrp = kz; | |||
| msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + | |||
| kzrp * kzrp / box_length[0].z / box_length[0].z; | |||
| index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; | |||
| if ((kx + ky + kz) == 0) { | |||
| PME_BC0[index] = 0; | |||
| } else { | |||
| PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; | |||
| } | |||
| PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; | |||
| } | |||
| } | |||
| } | |||
| cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream); | |||
| cudaStreamSynchronize(stream); | |||
| free(B1); | |||
| free(B2); | |||
| free(B3); | |||
| free(PME_BC0); | |||
| Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz), | |||
| 1 << 30); | |||
| PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>( | |||
| uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, | |||
| periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); | |||
| Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0); | |||
| PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q, | |||
| PME_kxyz, atom_numbers); | |||
| cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ)); | |||
| PME_Energy_Reciprocal<<<1, 1024, 0, stream>>>(PME_Nfft, PME_FQ, PME_BC, d_reciprocal_ene); | |||
| PME_Energy_Product<<<1, 1024, 0, stream>>>(atom_numbers, charge, charge, d_self_ene); | |||
| Scale_List<<<1, 1, 0, stream>>>(1, d_self_ene, -beta / sqrtf(PI)); | |||
| Reset_List<<<1, 1, 0, stream>>>(1, d_direct_ene, 0.0); | |||
| PME_Direct_Energy<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>( | |||
| atom_numbers, nl_a, uint_crd, scaler, charge, beta, cutoff * cutoff, d_direct_ene); | |||
| Reset_List<<<1, 1, 0, stream>>>(1, d_correction_ene, 0.0); | |||
| PME_Excluded_Energy_Correction<<<atom_numbers / 32 + 1, 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, scaler, charge, beta, sqrtf(PI), excluded_list_start, excluded_list, excluded_atom_numbers, | |||
| d_correction_ene); | |||
| return; | |||
| } | |||
| void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, | |||
| int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *scaler_f, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, | |||
| float *d_correction_ene, cudaStream_t stream); | |||
| @@ -0,0 +1,30 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_ENERGY_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include <vector> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *box_length_f, float *PME_BC, | |||
| int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *scaler_f, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, | |||
| float *d_correction_ene, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,102 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" | |||
| __global__ void PME_Excluded_Force_Correction(const int atom_numbers, const UNSIGNED_INT_VECTOR *uint_crd, | |||
| const VECTOR *sacler, const float *charge, const float pme_beta, | |||
| const float sqrt_pi, const int *excluded_list_start, | |||
| const int *excluded_list, const int *excluded_atom_numbers, VECTOR *frc) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| int excluded_numbers = excluded_atom_numbers[atom_i]; | |||
| if (excluded_numbers > 0) { | |||
| int list_start = excluded_list_start[atom_i]; | |||
| // int atom_min = excluded_list[list_start]; | |||
| int list_end = list_start + excluded_numbers; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| float charge_i = charge[atom_i]; | |||
| float charge_j; | |||
| float dr_abs; | |||
| float beta_dr; | |||
| UNSIGNED_INT_VECTOR r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr2; | |||
| float frc_abs = 0.; | |||
| VECTOR frc_lin; | |||
| VECTOR frc_record = {0., 0., 0.}; | |||
| for (int i = list_start; i < list_end; i = i + 1) { | |||
| atom_j = excluded_list[i]; | |||
| r2 = uint_crd[atom_j]; | |||
| charge_j = charge[atom_j]; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = sacler[0].x * int_x; | |||
| dr.y = sacler[0].y * int_y; | |||
| dr.z = sacler[0].z * int_z; | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_abs = sqrtf(dr2); | |||
| beta_dr = pme_beta * dr_abs; | |||
| // sqrt_pi= 2/sqrt(3.141592654); | |||
| frc_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); | |||
| frc_abs = (frc_abs - 1.) / dr2 / dr_abs; | |||
| frc_abs = -charge_i * charge_j * frc_abs; | |||
| frc_lin.x = frc_abs * dr.x; | |||
| frc_lin.y = frc_abs * dr.y; | |||
| frc_lin.z = frc_abs * dr.z; | |||
| frc_record.x = frc_record.x + frc_lin.x; | |||
| frc_record.y = frc_record.y + frc_lin.y; | |||
| frc_record.z = frc_record.z + frc_lin.z; | |||
| atomicAdd(&frc[atom_j].x, -frc_lin.x); | |||
| atomicAdd(&frc[atom_j].y, -frc_lin.y); | |||
| atomicAdd(&frc[atom_j].z, -frc_lin.z); | |||
| } // atom_j cycle | |||
| atomicAdd(&frc[atom_i].x, frc_record.x); | |||
| atomicAdd(&frc[atom_i].y, frc_record.y); | |||
| atomicAdd(&frc[atom_i].z, frc_record.z); | |||
| } // if need excluded | |||
| } | |||
| } | |||
| void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, | |||
| const float *charge, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream) { | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f); | |||
| VECTOR *sacler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(sacler_f)); | |||
| PME_Excluded_Force_Correction<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, uint_crd, sacler, charge, pme_beta, TWO_DIVIDED_BY_SQRT_PI, excluded_list_start, excluded_list, | |||
| excluded_atom_numbers, frc); | |||
| return; | |||
| } | |||
| void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, | |||
| const float *charge, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_EXCLUDED_FORCE_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, | |||
| const float *charge, const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,204 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_common.cuh" | |||
| __global__ void PME_BCFQ(cufftComplex *PME_FQ, float *PME_BC, int PME_Nfft) { | |||
| int index = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (index < PME_Nfft) { | |||
| float tempf = PME_BC[index]; | |||
| cufftComplex tempc = PME_FQ[index]; | |||
| PME_FQ[index].x = tempc.x * tempf; | |||
| PME_FQ[index].y = tempc.y * tempf; | |||
| } | |||
| } | |||
| __global__ void PME_Final(int *PME_atom_near, const float *charge, const float *PME_Q, VECTOR *force, | |||
| const VECTOR *PME_frxyz, const UNSIGNED_INT_VECTOR *PME_kxyz, | |||
| const VECTOR PME_inverse_box_vector, const int atom_numbers) { | |||
| int atom = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom < atom_numbers) { | |||
| int k, kx; | |||
| float tempdQx, tempdQy, tempdQz, tempdx, tempdy, tempdz, tempx, tempy, tempz, tempdQf; | |||
| float tempf, tempf2; | |||
| float temp_charge = charge[atom]; | |||
| int *temp_near = PME_atom_near + atom * 64; | |||
| UNSIGNED_INT_VECTOR temp_kxyz; | |||
| VECTOR temp_frxyz = PME_frxyz[atom]; | |||
| for (k = threadIdx.y; k < 64; k = k + blockDim.y) { | |||
| temp_kxyz = PME_kxyz[k]; | |||
| tempdQf = -PME_Q[temp_near[k]] * temp_charge; | |||
| kx = temp_kxyz.uint_x; | |||
| tempf = (temp_frxyz.x); | |||
| tempf2 = tempf * tempf; | |||
| tempx = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempdx = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; | |||
| kx = temp_kxyz.uint_y; | |||
| tempf = (temp_frxyz.y); | |||
| tempf2 = tempf * tempf; | |||
| tempy = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempdy = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; | |||
| kx = temp_kxyz.uint_z; | |||
| tempf = (temp_frxyz.z); | |||
| tempf2 = tempf * tempf; | |||
| tempz = PME_Ma[kx] * tempf * tempf2 + PME_Mb[kx] * tempf2 + PME_Mc[kx] * tempf + PME_Md[kx]; | |||
| tempdz = PME_dMa[kx] * tempf2 + PME_dMb[kx] * tempf + PME_dMc[kx]; | |||
| tempdQx = tempdx * tempy * tempz * PME_inverse_box_vector.x; | |||
| tempdQy = tempdy * tempx * tempz * PME_inverse_box_vector.y; | |||
| tempdQz = tempdz * tempx * tempy * PME_inverse_box_vector.z; | |||
| atomicAdd(&force[atom].x, tempdQf * tempdQx); | |||
| atomicAdd(&force[atom].y, tempdQf * tempdQy); | |||
| atomicAdd(&force[atom].z, tempdQf * tempdQz); | |||
| } | |||
| } | |||
| } | |||
| void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, | |||
| float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, | |||
| cudaStream_t stream) { | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| UNSIGNED_INT_VECTOR *PME_uxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_uxyz); | |||
| UNSIGNED_INT_VECTOR *PME_kxyz = reinterpret_cast<UNSIGNED_INT_VECTOR *>(pme_kxyz); | |||
| Reset_List<<<3 * atom_numbers / 32 + 1, 32, 0, stream>>>(3 * atom_numbers, reinterpret_cast<int *>(PME_uxyz), | |||
| 1 << 30); | |||
| VECTOR *PME_frxyz = reinterpret_cast<VECTOR *>(pme_frxyz); | |||
| VECTOR *frc = reinterpret_cast<VECTOR *>(force); | |||
| std::vector<float> h_box_length(3); | |||
| cudaMemcpyAsync(h_box_length.data(), box_length_f, sizeof(float) * h_box_length.size(), cudaMemcpyDeviceToHost, | |||
| stream); | |||
| cudaStreamSynchronize(stream); | |||
| VECTOR *box_length = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(h_box_length.data())); | |||
| cufftComplex *PME_FQ = reinterpret_cast<cufftComplex *>(pme_fq); | |||
| VECTOR PME_inverse_box_vector; | |||
| PME_inverse_box_vector.x = static_cast<float>(fftx) / box_length[0].x; | |||
| PME_inverse_box_vector.y = static_cast<float>(ffty) / box_length[0].y; | |||
| PME_inverse_box_vector.z = static_cast<float>(fftz) / box_length[0].z; | |||
| cufftHandle PME_plan_r2c; | |||
| cufftHandle PME_plan_c2r; | |||
| cufftPlan3d(&PME_plan_r2c, fftx, ffty, fftz, CUFFT_R2C); | |||
| cufftPlan3d(&PME_plan_c2r, fftx, ffty, fftz, CUFFT_C2R); | |||
| cufftSetStream(PME_plan_r2c, stream); | |||
| cufftSetStream(PME_plan_c2r, stream); | |||
| thread_PME.x = 8; | |||
| thread_PME.y = 8; | |||
| int PME_Nin = ffty * fftz; | |||
| int PME_Nfft = fftx * ffty * (fftz / 2 + 1); | |||
| int PME_Nall = fftx * ffty * fftz; | |||
| float volume = box_length[0].x * box_length[0].y * box_length[0].z; | |||
| UNSIGNED_INT_VECTOR *PME_kxyz_cpu; | |||
| Malloc_Safely(reinterpret_cast<void **>(&PME_kxyz_cpu), sizeof(UNSIGNED_INT_VECTOR) * 64); | |||
| int kx, ky, kz, kxrp, kyrp, kzrp, index; | |||
| for (kx = 0; kx < 4; kx++) { | |||
| for (ky = 0; ky < 4; ky++) { | |||
| for (kz = 0; kz < 4; kz++) { | |||
| index = kx * 16 + ky * 4 + kz; | |||
| PME_kxyz_cpu[index].uint_x = kx; | |||
| PME_kxyz_cpu[index].uint_y = ky; | |||
| PME_kxyz_cpu[index].uint_z = kz; | |||
| } | |||
| } | |||
| } | |||
| cudaMemcpyAsync(PME_kxyz, PME_kxyz_cpu, sizeof(UNSIGNED_INT_VECTOR) * 64, cudaMemcpyHostToDevice, stream); | |||
| cudaStreamSynchronize(stream); | |||
| free(PME_kxyz_cpu); | |||
| // initial start | |||
| float *B1, *B2, *B3, *PME_BC0; | |||
| B1 = reinterpret_cast<float *>(malloc(sizeof(float) * fftx)); | |||
| B2 = reinterpret_cast<float *>(malloc(sizeof(float) * ffty)); | |||
| B3 = reinterpret_cast<float *>(malloc(sizeof(float) * fftz)); | |||
| PME_BC0 = reinterpret_cast<float *>(malloc(sizeof(float) * PME_Nfft)); | |||
| for (kx = 0; kx < fftx; kx++) { | |||
| B1[kx] = getb(kx, fftx, 4); | |||
| } | |||
| for (ky = 0; ky < ffty; ky++) { | |||
| B2[ky] = getb(ky, ffty, 4); | |||
| } | |||
| for (kz = 0; kz < fftz; kz++) { | |||
| B3[kz] = getb(kz, fftz, 4); | |||
| } | |||
| float mprefactor = PI * PI / -beta / beta; | |||
| float msq; | |||
| for (kx = 0; kx < fftx; kx++) { | |||
| kxrp = kx; | |||
| if (kx > fftx / 2) kxrp = fftx - kx; | |||
| for (ky = 0; ky < ffty; ky++) { | |||
| kyrp = ky; | |||
| if (ky > ffty / 2) kyrp = ffty - ky; | |||
| for (kz = 0; kz <= fftz / 2; kz++) { | |||
| kzrp = kz; | |||
| msq = kxrp * kxrp / box_length[0].x / box_length[0].x + kyrp * kyrp / box_length[0].y / box_length[0].y + | |||
| kzrp * kzrp / box_length[0].z / box_length[0].z; | |||
| index = kx * ffty * (fftz / 2 + 1) + ky * (fftz / 2 + 1) + kz; | |||
| if ((kx + ky + kz) == 0) { | |||
| PME_BC0[index] = 0; | |||
| } else { | |||
| PME_BC0[index] = 1.0 / PI / msq * exp(mprefactor * msq) / volume; | |||
| } | |||
| PME_BC0[index] *= B1[kx] * B2[ky] * B3[kz]; | |||
| } | |||
| } | |||
| } | |||
| cudaMemcpyAsync(PME_BC, PME_BC0, sizeof(float) * PME_Nfft, cudaMemcpyHostToDevice, stream); | |||
| cudaStreamSynchronize(stream); | |||
| free(B1); | |||
| free(B2); | |||
| free(B3); | |||
| free(PME_BC0); | |||
| // initial end | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>( | |||
| 3 * atom_numbers, reinterpret_cast<float *>(frc), 0.); | |||
| PME_Atom_Near<<<atom_numbers / 32 + 1, 32, 0, stream>>>( | |||
| uint_crd, PME_atom_near, PME_Nin, periodic_factor_inverse * fftx, periodic_factor_inverse * ffty, | |||
| periodic_factor_inverse * fftz, atom_numbers, fftx, ffty, fftz, PME_kxyz, PME_uxyz, PME_frxyz); | |||
| Reset_List<<<PME_Nall / 1024 + 1, 1024, 0, stream>>>(PME_Nall, PME_Q, 0); | |||
| PME_Q_Spread<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_frxyz, PME_Q, | |||
| PME_kxyz, atom_numbers); | |||
| cufftExecR2C(PME_plan_r2c, reinterpret_cast<float *>(PME_Q), reinterpret_cast<cufftComplex *>(PME_FQ)); | |||
| PME_BCFQ<<<PME_Nfft / 1024 + 1, 1024, 0, stream>>>(PME_FQ, PME_BC, PME_Nfft); | |||
| cufftExecC2R(PME_plan_c2r, reinterpret_cast<cufftComplex *>(PME_FQ), reinterpret_cast<float *>(PME_Q)); | |||
| PME_Final<<<atom_numbers / thread_PME.x + 1, thread_PME, 0, stream>>>(PME_atom_near, charge, PME_Q, frc, PME_frxyz, | |||
| PME_kxyz, PME_inverse_box_vector, atom_numbers); | |||
| return; | |||
| } | |||
| void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, | |||
| float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_PME_PME_RECIPROCAL_FORCE_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include <vector> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, | |||
| float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, | |||
| const float *box_length_f, const int *uint_crd_f, const float *charge, float *force, | |||
| cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/common/getcenter_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO( | |||
| GetCenterOfGeometry, | |||
| KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| GetCenterOfGeometryGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,89 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/getcenter_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class GetCenterOfGeometryGpuKernel : public GpuKernel { | |||
| public: | |||
| GetCenterOfGeometryGpuKernel() : ele_center_atoms(1) {} | |||
| ~GetCenterOfGeometryGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| center_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "center_numbers")); | |||
| center_numbers_inverse = static_cast<int>(GetAttr<float>(kernel_node, "center_numbers_inverse")); | |||
| auto shape_center_atoms = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| for (size_t i = 0; i < shape_center_atoms.size(); i++) ele_center_atoms *= shape_center_atoms[i]; | |||
| for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto center_atoms = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto crd = GetDeviceAddress<const T>(inputs, 1); | |||
| auto center_of_geometry = GetDeviceAddress<T>(outputs, 0); | |||
| GetCenterOfGeometry(center_numbers, center_numbers_inverse, center_atoms, crd, center_of_geometry, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_center_atoms * sizeof(T1)); | |||
| input_size_list_.push_back(ele_crd * sizeof(T)); | |||
| output_size_list_.push_back(3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_center_atoms = 1; | |||
| size_t ele_crd = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int center_numbers; | |||
| float center_numbers_inverse; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GETCENTER_KERNEL_H_ | |||
| @@ -0,0 +1,31 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/common/mdtemperature_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(MDTemperature, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| MDTemperatureGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,96 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/mdtemperature_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class MDTemperatureGpuKernel : public GpuKernel { | |||
| public: | |||
| MDTemperatureGpuKernel() : ele_start(1) {} | |||
| ~MDTemperatureGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers")); | |||
| auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_atom_vel = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_atom_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; | |||
| for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; | |||
| for (size_t i = 0; i < shape_atom_vel.size(); i++) ele_atom_vel *= shape_atom_vel[i]; | |||
| for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto start = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto end = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto atom_vel_f = GetDeviceAddress<const T>(inputs, 2); | |||
| auto atom_mass = GetDeviceAddress<const T>(inputs, 3); | |||
| auto ek = GetDeviceAddress<T>(outputs, 0); | |||
| MDTemperature(residue_numbers, start, end, atom_vel_f, atom_mass, ek, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_start * sizeof(T1)); | |||
| input_size_list_.push_back(ele_end * sizeof(T1)); | |||
| input_size_list_.push_back(ele_atom_vel * sizeof(T)); | |||
| input_size_list_.push_back(ele_atom_mass * sizeof(T)); | |||
| output_size_list_.push_back(residue_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_start = 1; | |||
| size_t ele_end = 1; | |||
| size_t ele_atom_vel = 1; | |||
| size_t ele_atom_mass = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int residue_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MDTEMPERATURE_KERNEL_H_ | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/lj/lj_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(LJEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| LJEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,130 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class LJEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| LJEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~LJEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| cutoff_square = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff_square")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; | |||
| // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd = GetDeviceAddress<T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto scaler = GetDeviceAddress<T>(inputs, 3); | |||
| auto nl_numbers = GetDeviceAddress<T1>(inputs, 4); | |||
| auto nl_serial = GetDeviceAddress<T1>(inputs, 5); | |||
| auto d_LJ_a = GetDeviceAddress<T>(inputs, 6); | |||
| auto d_LJ_b = GetDeviceAddress<T>(inputs, 7); | |||
| auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0); | |||
| auto nl = GetDeviceAddress<T1>(workspace, 1); | |||
| auto d_LJ_energy_atom = GetDeviceAddress<T>(outputs, 0); | |||
| LJEnergy(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl, | |||
| d_LJ_a, d_LJ_b, d_LJ_energy_atom, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_scaler * sizeof(T)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(max_nl_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); | |||
| input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); | |||
| workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_scaler = 1; | |||
| size_t ele_nl = 1; | |||
| size_t ele_d_LJ_a = 1; | |||
| size_t ele_d_LJ_b = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| float cutoff_square; | |||
| int max_nl_numbers = 800; | |||
| struct UINT_VECTOR_LJ_TYPE { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| int LJ_type; | |||
| float charge; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/lj/lj_force_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(LJForce, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| LJForceGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,129 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class LJForceGpuKernel : public GpuKernel { | |||
| public: | |||
| LJForceGpuKernel() : ele_uint_crd(1) {} | |||
| ~LJForceGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| cutoff_square = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff_square")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; | |||
| // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd = GetDeviceAddress<T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto scaler = GetDeviceAddress<T>(inputs, 3); | |||
| auto nl_numbers = GetDeviceAddress<T1>(inputs, 4); | |||
| auto nl_serial = GetDeviceAddress<T1>(inputs, 5); | |||
| auto d_LJ_a = GetDeviceAddress<T>(inputs, 6); | |||
| auto d_LJ_b = GetDeviceAddress<T>(inputs, 7); | |||
| auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0); | |||
| auto nl = GetDeviceAddress<T1>(workspace, 1); | |||
| auto frc = GetDeviceAddress<T>(outputs, 0); | |||
| LJForce(atom_numbers, cutoff_square, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, nl_numbers, nl_serial, nl, | |||
| d_LJ_a, d_LJ_b, frc, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_scaler * sizeof(T)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(max_nl_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); | |||
| input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); | |||
| workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_scaler = 1; | |||
| size_t ele_d_LJ_a = 1; | |||
| size_t ele_d_LJ_b = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| float cutoff_square; | |||
| int max_nl_numbers = 800; | |||
| struct UINT_VECTOR_LJ_TYPE { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| int LJ_type; | |||
| float charge; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/lj/lj_force_with_pme_direct_force_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(LJForceWithPMEDirectForce, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| LJForceWithPMEDirectForceGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,133 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_LJ_LJ_FORCE_WITH_PME_DIRECT_FORCE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class LJForceWithPMEDirectForceGpuKernel : public GpuKernel { | |||
| public: | |||
| LJForceWithPMEDirectForceGpuKernel() : ele_uint_crd(1) {} | |||
| ~LJForceWithPMEDirectForceGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| cutoff = static_cast<float>(GetAttr<float_t>(kernel_node, "cutoff")); | |||
| pme_beta = static_cast<float>(GetAttr<float_t>(kernel_node, "pme_beta")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_nl_numbers = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_nl_serial = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_d_LJ_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_d_LJ_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; | |||
| // for (size_t i = 0; i < shape_nl.size(); i++) ele_nl *= shape_nl[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_a.size(); i++) ele_d_LJ_a *= shape_d_LJ_a[i]; | |||
| for (size_t i = 0; i < shape_d_LJ_b.size(); i++) ele_d_LJ_b *= shape_d_LJ_b[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd = GetDeviceAddress<T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto scaler = GetDeviceAddress<T>(inputs, 3); | |||
| auto nl_numbers = GetDeviceAddress<T1>(inputs, 4); | |||
| auto nl_serial = GetDeviceAddress<T1>(inputs, 5); | |||
| auto d_LJ_a = GetDeviceAddress<T>(inputs, 6); | |||
| auto d_LJ_b = GetDeviceAddress<T>(inputs, 7); | |||
| auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0); | |||
| auto nl = GetDeviceAddress<T1>(workspace, 1); | |||
| auto frc = GetDeviceAddress<T>(outputs, 0); | |||
| LJForceWithPMEDirectForce(atom_numbers, cutoff, pme_beta, uint_crd, LJtype, charge, scaler, uint_crd_with_LJ, | |||
| nl_numbers, nl_serial, nl, d_LJ_a, d_LJ_b, frc, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_scaler * sizeof(T)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(max_nl_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(ele_d_LJ_a * sizeof(T)); | |||
| input_size_list_.push_back(ele_d_LJ_b * sizeof(T)); | |||
| workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_scaler = 1; | |||
| size_t ele_nl = 1; | |||
| size_t ele_d_LJ_a = 1; | |||
| size_t ele_d_LJ_b = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| float pme_beta; | |||
| float cutoff; | |||
| int max_nl_numbers = 800; | |||
| struct UINT_VECTOR_LJ_TYPE { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| int LJ_type; | |||
| float charge; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_atom_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14CFAtomEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14CFAtomEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,114 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14CFAtomEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14CFAtomEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14CFAtomEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto cf_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto ene = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14CFAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, | |||
| cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_cf_scale_factor = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ATOM_ENERGY_KERNEL_H_ | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_cf_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14CFEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14CFEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,114 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_CF_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14CFEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14CFEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14CFEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto cf_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto ene = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14CFEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, | |||
| cf_scale_factor, ene, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_cf_scale_factor = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_atom_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJAtomEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJAtomEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,123 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJAtomEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJAtomEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJAtomEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 8); | |||
| auto ene = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14LJAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, | |||
| lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_KERNEL_H_ | |||
| @@ -0,0 +1,38 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJCFForceWithAtomEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJCFForceWithAtomEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,132 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJCFForceWithAtomEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJCFForceWithAtomEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJCFForceWithAtomEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 8); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 9); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 0); | |||
| auto atom_energy = GetDeviceAddress<T>(outputs, 1); | |||
| Dihedral14LJCFForceWithAtomEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, | |||
| b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, atom_energy, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_cf_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_KERNEL_H_ | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,124 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 8); | |||
| auto ene = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14LJEnergy(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, | |||
| lj_scale_factor, LJ_type_A, LJ_type_B, ene, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_ENERGY_KERNEL_H_ | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_gpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJForce, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJForceGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,122 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJForceGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJForceGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJForceGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 8); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14LJForce(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, b_14, | |||
| lj_scale_factor, LJ_type_A, LJ_type_B, frc_f, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_KERNEL_H_ | |||
| @@ -0,0 +1,37 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_force_with_direct_cf_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJForceWithDirectCF, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJForceWithDirectCFGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,130 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJForceWithDirectCFGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJForceWithDirectCFGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJForceWithDirectCFGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 8); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 9); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 0); | |||
| Dihedral14LJForceWithDirectCF(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, boxlength_f, a_14, | |||
| b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_cf_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_KERNEL_H_ | |||
| @@ -0,0 +1,45 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(NeighborListUpdate, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| NeighborListUpdateGpuKernel, int, float) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,170 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class NeighborListUpdateGpuKernel : public GpuKernel { | |||
| public: | |||
| NeighborListUpdateGpuKernel() : skin(2.0), cutoff(10.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {} | |||
| ~NeighborListUpdateGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| refresh_count = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_count")); | |||
| refresh_interval = static_cast<int>(GetAttr<int64_t>(kernel_node, "refresh_interval")); | |||
| not_first_time = static_cast<int>(GetAttr<int64_t>(kernel_node, "not_first_time")); | |||
| Nxy = static_cast<int>(GetAttr<int64_t>(kernel_node, "Nxy")); | |||
| excluded_atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "excluded_atom_numbers")); | |||
| cutoff_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_square")); | |||
| half_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "half_skin_square")); | |||
| cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin")); | |||
| half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin")); | |||
| cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square")); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto atom_numbers_in_grid_bucket = GetDeviceAddress<int>(inputs, 0); | |||
| auto bucket = GetDeviceAddress<int>(inputs, 1); | |||
| auto crd = GetDeviceAddress<float>(inputs, 2); | |||
| auto box_length = GetDeviceAddress<float>(inputs, 3); | |||
| auto grid_N = GetDeviceAddress<int>(inputs, 4); | |||
| auto grid_length_inverse = GetDeviceAddress<float>(inputs, 5); | |||
| auto atom_in_grid_serial = GetDeviceAddress<int>(inputs, 6); | |||
| auto old_crd = GetDeviceAddress<float>(inputs, 7); | |||
| auto crd_to_uint_crd_cof = GetDeviceAddress<float>(inputs, 8); | |||
| auto uint_crd = GetDeviceAddress<unsigned int>(inputs, 9); | |||
| auto gpointer = GetDeviceAddress<int>(inputs, 10); | |||
| auto nl_atom_numbers = GetDeviceAddress<int>(inputs, 11); | |||
| auto nl_atom_serial = GetDeviceAddress<int>(inputs, 12); | |||
| auto uint_dr_to_dr_cof = GetDeviceAddress<float>(inputs, 13); | |||
| auto excluded_list_start = GetDeviceAddress<int>(inputs, 14); | |||
| auto excluded_list = GetDeviceAddress<int>(inputs, 15); | |||
| auto excluded_numbers = GetDeviceAddress<int>(inputs, 16); | |||
| auto need_refresh_flag = GetDeviceAddress<int>(inputs, 17); | |||
| GRID_BUCKET *d_bucket = reinterpret_cast<GRID_BUCKET *>(GetDeviceAddress<int>(workspaces, 0)); | |||
| GRID_POINTER *d_gpointer = reinterpret_cast<GRID_POINTER *>(GetDeviceAddress<int>(workspaces, 1)); | |||
| NEIGHBOR_LIST *nl = GetDeviceAddress<NEIGHBOR_LIST>(workspaces, 2); | |||
| float *half_crd_to_uint_crd_cof = GetDeviceAddress<float>(workspaces, 3); | |||
| std::vector<GRID_BUCKET> h_bucket(grid_numbers); | |||
| for (size_t i = 0; i < h_bucket.size(); i += 1) { | |||
| h_bucket[i].atom_serial = bucket + i * max_atom_in_grid_numbers; | |||
| } | |||
| std::vector<GRID_POINTER> h_gpointer(grid_numbers); | |||
| for (size_t i = 0; i < h_gpointer.size(); i += 1) { | |||
| h_gpointer[i].grid_serial = gpointer + i * 125; | |||
| } | |||
| cudaMemcpyAsync(d_bucket, h_bucket.data(), sizeof(GRID_BUCKET) * grid_numbers, cudaMemcpyHostToDevice, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| cudaMemcpyAsync(d_gpointer, h_gpointer.data(), sizeof(GRID_POINTER) * grid_numbers, cudaMemcpyHostToDevice, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| Neighbor_List_Update(grid_numbers, atom_numbers, refresh_count, refresh_interval, not_first_time, skin, Nxy, | |||
| cutoff_square, cutoff_with_skin_square, grid_N, box_length, atom_numbers_in_grid_bucket, | |||
| grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof, | |||
| half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start, | |||
| excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(sizeof(int) * grid_numbers); | |||
| input_size_list_.push_back(sizeof(int) * max_atom_in_grid_numbers * grid_numbers); | |||
| input_size_list_.push_back(sizeof(VECTOR) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(sizeof(INT_VECTOR)); | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(sizeof(int) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(VECTOR) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(sizeof(UNSIGNED_INT_VECTOR) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(int) * grid_numbers * 125); | |||
| input_size_list_.push_back(sizeof(int) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(int) * atom_numbers * max_neighbor_numbers); | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(sizeof(int) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(int) * excluded_atom_numbers); | |||
| input_size_list_.push_back(sizeof(int) * atom_numbers); | |||
| input_size_list_.push_back(sizeof(int)); | |||
| workspace_size_list_.push_back(sizeof(GRID_BUCKET) * grid_numbers); | |||
| workspace_size_list_.push_back(sizeof(GRID_POINTER) * grid_numbers); | |||
| workspace_size_list_.push_back(sizeof(NEIGHBOR_LIST) * atom_numbers); | |||
| workspace_size_list_.push_back(sizeof(float) * 3); | |||
| output_size_list_.push_back(sizeof(float)); | |||
| } | |||
| private: | |||
| float skin; | |||
| float cutoff; | |||
| int not_first_time; | |||
| int atom_numbers; | |||
| int grid_numbers; | |||
| int refresh_count; | |||
| int refresh_interval; | |||
| int Nxy; | |||
| int max_atom_in_grid_numbers; | |||
| int max_neighbor_numbers; | |||
| int excluded_atom_numbers; | |||
| float half_skin_square; | |||
| float cutoff_square; | |||
| float cutoff_with_skin; | |||
| float half_cutoff_with_skin; | |||
| float cutoff_with_skin_square; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nvtit/md_iteration_leap_frog_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_ONE(MDIterationLeapFrog, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| MDIterationLeapFrogGpuKernel, float) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,115 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class MDIterationLeapFrogGpuKernel : public GpuKernel { | |||
| public: | |||
| MDIterationLeapFrogGpuKernel() : ele_mass_inverse(1) {} | |||
| ~MDIterationLeapFrogGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| float4_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "float4_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| half_dt = static_cast<float>(GetAttr<float>(kernel_node, "half_dt")); | |||
| dt = static_cast<float>(GetAttr<float>(kernel_node, "dt")); | |||
| exp_gamma = static_cast<float>(GetAttr<float>(kernel_node, "exp_gamma")); | |||
| is_max_velocity = static_cast<int>(GetAttr<int64_t>(kernel_node, "is_max_velocity")); | |||
| max_velocity = static_cast<float>(GetAttr<float>(kernel_node, "max_velocity")); | |||
| // printf("float4_numbers: %d", float4_numbers); | |||
| // printf("atom_numbers: %d", atom_numbers); | |||
| // printf("half_dt: %f", half_dt); | |||
| // printf("dt: %f", dt); | |||
| // printf("exp_gamma: %f", exp_gamma); | |||
| // printf("is_max_velocity: %d", is_max_velocity); | |||
| // printf("max_velocity: %f", max_velocity); | |||
| auto shape_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_qrt_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| for (size_t i = 0; i < shape_mass_inverse.size(); i++) ele_mass_inverse *= shape_mass_inverse[i]; | |||
| for (size_t i = 0; i < shape_qrt_mass.size(); i++) ele_sqrt_mass *= shape_qrt_mass[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto d_mass_inverse = GetDeviceAddress<const T>(inputs, 0); | |||
| auto d_sqrt_mass = GetDeviceAddress<const T>(inputs, 1); | |||
| auto vel_f = GetDeviceAddress<T>(outputs, 0); | |||
| auto crd_f = GetDeviceAddress<T>(outputs, 1); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 2); | |||
| auto acc_f = GetDeviceAddress<T>(outputs, 3); | |||
| MDIterationLeapFrog(float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity, | |||
| d_mass_inverse, d_sqrt_mass, vel_f, crd_f, frc_f, acc_f, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_mass_inverse * sizeof(T)); | |||
| input_size_list_.push_back(ele_sqrt_mass * sizeof(T)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_mass_inverse = 1; | |||
| size_t ele_sqrt_mass = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int float4_numbers; | |||
| int atom_numbers; | |||
| float half_dt; | |||
| float dt; | |||
| float exp_gamma; | |||
| int is_max_velocity; | |||
| float max_velocity; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_NVTIT_MD_ITERATION_LEAP_FROG_KERNEL_H_ | |||
| @@ -0,0 +1,38 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/pme/pme_energy_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(PMEEnergy, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| PMEEnergyGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,147 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_ENERGY_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <cufft.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_energy_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class PMEEnergyGpuKernel : public GpuKernel { | |||
| public: | |||
| PMEEnergyGpuKernel() : ele_uint_crd(1) {} | |||
| ~PMEEnergyGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta")); | |||
| fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx")); | |||
| ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty")); | |||
| fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz")); | |||
| PME_Nall = fftx * ffty * fftz; | |||
| PME_Nfft = fftx * ffty * (fftz / 2 + 1); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto boxlength = GetDeviceAddress<T>(inputs, 0); | |||
| auto uint_crd = GetDeviceAddress<T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto nl_numbers = GetDeviceAddress<T1>(inputs, 3); | |||
| auto nl_serial = GetDeviceAddress<T1>(inputs, 4); | |||
| auto scaler = GetDeviceAddress<T>(inputs, 5); | |||
| auto excluded_list_start = GetDeviceAddress<int>(inputs, 6); | |||
| auto excluded_list = GetDeviceAddress<int>(inputs, 7); | |||
| auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 8); | |||
| auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace | |||
| auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace | |||
| auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace | |||
| auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace | |||
| auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace | |||
| auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace | |||
| auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace | |||
| auto nl = GetDeviceAddress<T1>(workspace, 7); | |||
| auto reciprocal_ene = GetDeviceAddress<T>(outputs, 0); | |||
| auto self_ene = GetDeviceAddress<T>(outputs, 1); | |||
| auto direct_ene = GetDeviceAddress<T>(outputs, 2); | |||
| auto correction_ene = GetDeviceAddress<T>(outputs, 3); | |||
| PMEEnergy(fftx, ffty, fftz, atom_numbers, beta, boxlength, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, | |||
| pme_atom_near, pme_kxyz, uint_crd, charge, nl_numbers, nl_serial, nl, scaler, excluded_list_start, | |||
| excluded_list, excluded_atom_numbers, reciprocal_ene, self_ene, direct_ene, correction_ene, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(max_nl_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| workspace_size_list_.push_back(PME_Nall * sizeof(T)); | |||
| workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); | |||
| workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); | |||
| workspace_size_list_.push_back(PME_Nfft * sizeof(float)); | |||
| workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); | |||
| workspace_size_list_.push_back(atom_numbers * max_nl_numbers * sizeof(T1)); | |||
| output_size_list_.push_back(sizeof(T)); | |||
| output_size_list_.push_back(sizeof(T)); | |||
| output_size_list_.push_back(sizeof(T)); | |||
| output_size_list_.push_back(sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| int max_nl_numbers = 800; | |||
| int fftx; | |||
| int ffty; | |||
| int fftz; | |||
| float beta; | |||
| int PME_Nall; | |||
| int PME_Nfft; | |||
| struct VECTOR { | |||
| float x; | |||
| float y; | |||
| float z; | |||
| }; | |||
| struct UNSIGNED_INT_VECTOR { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| }; | |||
| struct NEIGHBOR_LIST { | |||
| int atom_numbers; | |||
| int *atom_serial; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/pme/pme_excluded_force_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(PMEExcludedForce, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| PMEExcludedForceGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,95 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_EXCLUDED_FORCE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <cufft.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class PMEExcludedForceGpuKernel : public GpuKernel { | |||
| public: | |||
| PMEExcludedForceGpuKernel() : ele_uint_crd(1) {} | |||
| ~PMEExcludedForceGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta")); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd = GetDeviceAddress<int>(inputs, 0); | |||
| auto sacler = GetDeviceAddress<T>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto excluded_list_start = GetDeviceAddress<int>(inputs, 3); | |||
| auto excluded_list = GetDeviceAddress<int>(inputs, 4); | |||
| auto excluded_atom_numbers = GetDeviceAddress<int>(inputs, 5); | |||
| auto force = GetDeviceAddress<T>(outputs, 0); | |||
| PMEExcludedForce(atom_numbers, beta, uint_crd, sacler, charge, excluded_list_start, excluded_list, | |||
| excluded_atom_numbers, force, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(T1)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| float beta; | |||
| struct VECTOR { | |||
| float x; | |||
| float y; | |||
| float z; | |||
| }; | |||
| struct UNSIGNED_INT_VECTOR { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/pme/pme_reciprocal_force_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(PMEReciprocalForce, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| PMEReciprocalForceGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,119 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_PME_PME_RECIPROCAL_FORCE_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <cufft.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class PMEReciprocalForceGpuKernel : public GpuKernel { | |||
| public: | |||
| PMEReciprocalForceGpuKernel() : ele_uint_crd(1) {} | |||
| ~PMEReciprocalForceGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| beta = static_cast<float>(GetAttr<float_t>(kernel_node, "beta")); | |||
| fftx = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftx")); | |||
| ffty = static_cast<int>(GetAttr<int64_t>(kernel_node, "ffty")); | |||
| fftz = static_cast<int>(GetAttr<int64_t>(kernel_node, "fftz")); | |||
| PME_Nall = fftx * ffty * fftz; | |||
| PME_Nfft = fftx * ffty * (fftz / 2 + 1); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto boxlength = GetDeviceAddress<T>(inputs, 0); | |||
| auto uint_crd = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<T>(inputs, 2); | |||
| auto pme_uxyz = GetDeviceAddress<int>(workspace, 0); // workspace | |||
| auto pme_frxyz = GetDeviceAddress<float>(workspace, 1); // workspace | |||
| auto pme_q = GetDeviceAddress<T>(workspace, 2); // workspace | |||
| auto pme_fq = GetDeviceAddress<float>(workspace, 3); // workspace | |||
| auto pme_atom_near = GetDeviceAddress<int>(workspace, 4); // workspace | |||
| auto pme_bc = GetDeviceAddress<float>(workspace, 5); // workspace | |||
| auto pme_kxyz = GetDeviceAddress<int>(workspace, 6); // workspace | |||
| auto force = GetDeviceAddress<T>(outputs, 0); | |||
| PMEReciprocalForce(fftx, ffty, fftz, atom_numbers, beta, pme_bc, pme_uxyz, pme_frxyz, pme_q, pme_fq, pme_atom_near, | |||
| pme_kxyz, boxlength, uint_crd, charge, force, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(sizeof(VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); | |||
| input_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UNSIGNED_INT_VECTOR)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| workspace_size_list_.push_back(PME_Nall * sizeof(T)); | |||
| workspace_size_list_.push_back(PME_Nfft * sizeof(cufftComplex)); | |||
| workspace_size_list_.push_back(atom_numbers * 64 * sizeof(int)); | |||
| workspace_size_list_.push_back(PME_Nfft * sizeof(float)); | |||
| workspace_size_list_.push_back(64 * sizeof(UNSIGNED_INT_VECTOR)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(VECTOR)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int atom_numbers; | |||
| int fftx; | |||
| int ffty; | |||
| int fftz; | |||
| float beta; | |||
| int PME_Nall; | |||
| int PME_Nfft; | |||
| struct VECTOR { | |||
| float x; | |||
| float y; | |||
| float z; | |||
| }; | |||
| struct UNSIGNED_INT_VECTOR { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -114,7 +114,8 @@ if(ENABLE_GPU) | |||
| ${CUDNN_LIBRARY_PATH} | |||
| ${CUDA_PATH}/lib64/libcudart.so | |||
| ${CUDA_PATH}/lib64/stubs/libcuda.so | |||
| ${CUDA_PATH}/lib64/libcusolver.so) | |||
| ${CUDA_PATH}/lib64/libcusolver.so | |||
| ${CUDA_PATH}/lib64/libcufft.so) | |||
| endif() | |||
| if(CMAKE_SYSTEM_NAME MATCHES "Linux") | |||
| @@ -100,8 +100,13 @@ from ._embedding_cache_ops import (CacheSwapHashmap, SearchCacheIdx, CacheSwapTa | |||
| MapUniform, DynamicAssign, PadAndShift) | |||
| from .quantum_ops import PQC, Evolution | |||
| from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAtomEnergy, BondForceWithAtomVirial, | |||
| DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy, | |||
| AngleForce, AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy) | |||
| DihedralForce, DihedralEnergy, DihedralAtomEnergy, DihedralForceWithAtomEnergy, AngleForce, | |||
| AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce, | |||
| LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce, | |||
| Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy, | |||
| Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog, | |||
| GetCenterOfGeometry, MDTemperature, NeighborListUpdate) | |||
| __all__ = [ | |||
| 'Unique', | |||
| @@ -438,6 +443,24 @@ __all__ = [ | |||
| "AngleEnergy", | |||
| "AngleAtomEnergy", | |||
| "AngleForceWithAtomEnergy", | |||
| 'PMEReciprocalForce', | |||
| 'LJForce', | |||
| 'LJForceWithPMEDirectForce', | |||
| 'LJEnergy', | |||
| 'PMEExcludedForce', | |||
| 'PMEEnergy', | |||
| "Dihedral14LJForce", | |||
| "Dihedral14LJEnergy", | |||
| "Dihedral14LJForceWithDirectCF", | |||
| "Dihedral14LJCFForceWithAtomEnergy", | |||
| "Dihedral14LJAtomEnergy", | |||
| "Dihedral14CFEnergy", | |||
| "MDIterationLeapFrog", | |||
| "Dihedral14CFAtomEnergy", | |||
| "GetCenterOfGeometry", | |||
| "MDTemperature", | |||
| "NeighborListUpdate", | |||
| ] | |||
| __all__.sort() | |||