| @@ -0,0 +1,75 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" | |||
| __global__ void BondForceWithAtomEnergyAndVirialKernel(const int bond_numbers, const UNSIGNED_INT_VECTOR *uint_crd, | |||
| const VECTOR *scaler, const int *atom_a, const int *atom_b, | |||
| const float *bond_k, const float *bond_r0, VECTOR *frc, | |||
| float *atom_energy, float *atom_virial) { | |||
| int bond_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (bond_i < bond_numbers) { | |||
| int atom_i = atom_a[bond_i]; | |||
| int atom_j = atom_b[bond_i]; | |||
| float k = bond_k[bond_i]; | |||
| float r0 = bond_r0[bond_i]; | |||
| VECTOR dr = Get_Periodic_Displacement(uint_crd[atom_i], uint_crd[atom_j], scaler[0]); | |||
| float abs_r = norm3df(dr.x, dr.y, dr.z); | |||
| float r_1 = 1. / abs_r; | |||
| float tempf2 = abs_r - r0; | |||
| float tempf = 2 * tempf2 * k; | |||
| VECTOR f = tempf * r_1 * dr; | |||
| atomicAdd(&frc[atom_i].x, -f.x); | |||
| atomicAdd(&frc[atom_i].y, -f.y); | |||
| atomicAdd(&frc[atom_i].z, -f.z); | |||
| atomicAdd(&frc[atom_j].x, f.x); | |||
| atomicAdd(&frc[atom_j].y, f.y); | |||
| atomicAdd(&frc[atom_j].z, f.z); | |||
| atomicAdd(&atom_virial[atom_i], -tempf * abs_r); | |||
| atomicAdd(&atom_energy[atom_i], k * tempf2 * tempf2); | |||
| } | |||
| } | |||
| void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, | |||
| const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, | |||
| const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v, | |||
| cudaStream_t stream) { | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_v, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.); | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(bond_numbers) / 128); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| BondForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| bond_numbers, uint_crd, scaler, atom_a, atom_b, bond_k, bond_r0, frc, atom_energy, atom_v); | |||
| return; | |||
| } | |||
| void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, | |||
| const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, | |||
| const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,27 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_BOND_BOND_FORCE_WITH_ATOM_VIRIAL_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, | |||
| const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, | |||
| const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v, | |||
| cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh" | |||
| __global__ void Get_Center_Of_Mass(int residue_numbers, int *start, int *end, VECTOR *crd, float *atom_mass, | |||
| float *residue_mass_inverse, VECTOR *center_of_mass) { | |||
| for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers; | |||
| residue_i += gridDim.x * blockDim.x) { | |||
| VECTOR com_lin = {0.0f, 0.0f, 0.0f}; | |||
| for (int atom_i = start[residue_i]; atom_i < end[residue_i]; atom_i += 1) { | |||
| com_lin = com_lin + atom_mass[atom_i] * crd[atom_i]; | |||
| } | |||
| center_of_mass[residue_i] = residue_mass_inverse[residue_i] * com_lin; | |||
| } | |||
| } | |||
| void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass, | |||
| float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream) { | |||
| Reset_List<<<ceilf(static_cast<float>(3. * residue_numbers) / 128), 128, 0, stream>>>(3 * residue_numbers, | |||
| center_of_mass_f, 0.); | |||
| VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f); | |||
| VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f); | |||
| Get_Center_Of_Mass<<<20, 32, 0, stream>>>(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse, | |||
| center_of_mass); | |||
| return; | |||
| } | |||
| void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass, | |||
| float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTEROFMASS_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass, | |||
| float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh" | |||
| __global__ void Map_Center_Of_Mass(int residue_numbers, int *start, int *end, float scaler, VECTOR *center_of_mass, | |||
| VECTOR *box_length, VECTOR *no_wrap_crd, VECTOR *crd) { | |||
| VECTOR trans_vec; | |||
| VECTOR com; | |||
| for (int residue_i = blockDim.x * blockIdx.x + threadIdx.x; residue_i < residue_numbers; | |||
| residue_i += gridDim.x * blockDim.x) { | |||
| com = center_of_mass[residue_i]; | |||
| trans_vec.x = com.x - floorf(com.x / box_length[0].x) * box_length[0].x; | |||
| trans_vec.y = com.y - floorf(com.y / box_length[0].y) * box_length[0].y; | |||
| trans_vec.z = com.z - floorf(com.z / box_length[0].z) * box_length[0].z; | |||
| trans_vec = scaler * trans_vec - com; | |||
| for (int atom_i = start[residue_i] + threadIdx.y; atom_i < end[residue_i]; atom_i += blockDim.y) { | |||
| crd[atom_i] = no_wrap_crd[atom_i] + trans_vec; | |||
| } | |||
| } | |||
| } | |||
| void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f, | |||
| float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream) { | |||
| VECTOR *crd = reinterpret_cast<VECTOR *>(crd_f); | |||
| VECTOR *no_wrap_crd = reinterpret_cast<VECTOR *>(no_wrap_crd_f); | |||
| VECTOR *box_length = reinterpret_cast<VECTOR *>(box_length_f); | |||
| VECTOR *center_of_mass = reinterpret_cast<VECTOR *>(center_of_mass_f); | |||
| Map_Center_Of_Mass<<<20, {32, 4}, 0, stream>>>(residue_numbers, start, end, scaler, center_of_mass, box_length, | |||
| no_wrap_crd, crd); | |||
| return; | |||
| } | |||
| void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f, | |||
| float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream); | |||
| @@ -0,0 +1,26 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void MapCenterOfMass(int residue_numbers, int *start, int *end, float scaler, float *center_of_mass_f, | |||
| float *box_length_f, float *no_wrap_crd_f, float *crd_f, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_ | |||
| @@ -0,0 +1,147 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void LJ_Direct_CF_Force_With_Atom_Energy_CUDA(const int atom_numbers, const NEIGHBOR_LIST *nl, | |||
| const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *boxlength, | |||
| const float *LJ_type_A, const float *LJ_type_B, | |||
| const float cutoff, VECTOR *frc, const float pme_beta, | |||
| const float sqrt_pi, float *atom_energy) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| NEIGHBOR_LIST nl_i = nl[atom_i]; | |||
| int N = nl_i.atom_numbers; | |||
| int atom_j; | |||
| int int_x; | |||
| int int_y; | |||
| int int_z; | |||
| UINT_VECTOR_LJ_TYPE r1 = uint_crd[atom_i], r2; | |||
| VECTOR dr; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_6; | |||
| float frc_abs = 0.; | |||
| VECTOR frc_lin; | |||
| VECTOR frc_record = {0., 0., 0.}; | |||
| float charge_i = r1.charge; | |||
| float charge_j; | |||
| float dr_abs; | |||
| float dr_1; | |||
| float beta_dr; | |||
| float frc_cf_abs; | |||
| float ene_lin = 0.; | |||
| float ene_lin2 = 0.; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| for (int j = threadIdx.y; j < N; j = j + blockDim.y) { | |||
| atom_j = nl_i.atom_serial[j]; | |||
| r2 = uint_crd[atom_j]; | |||
| charge_j = r2.charge; | |||
| int_x = r2.uint_x - r1.uint_x; | |||
| int_y = r2.uint_y - r1.uint_y; | |||
| int_z = r2.uint_z - r1.uint_z; | |||
| dr.x = boxlength[0].x * int_x; | |||
| dr.y = boxlength[0].y * int_y; | |||
| dr.z = boxlength[0].z * int_z; | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| if (dr_abs < cutoff) { | |||
| dr_1 = 1. / dr_abs; | |||
| dr_2 = dr_1 * dr_1; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_6 = dr_4 * dr_2; | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = (-LJ_type_A[atom_pair_LJ_type] * dr_6 + LJ_type_B[atom_pair_LJ_type]) * dr_8; | |||
| beta_dr = pme_beta * dr_abs; | |||
| frc_cf_abs = beta_dr * sqrt_pi * expf(-beta_dr * beta_dr) + erfcf(beta_dr); | |||
| frc_cf_abs = frc_cf_abs * dr_2 * dr_1; | |||
| frc_cf_abs = charge_i * charge_j * frc_cf_abs; | |||
| ene_lin2 = ene_lin2 + charge_i * charge_j * erfcf(beta_dr) * dr_1; | |||
| ene_lin = | |||
| ene_lin + | |||
| (0.083333333 * LJ_type_A[atom_pair_LJ_type] * dr_6 - 0.166666666 * LJ_type_B[atom_pair_LJ_type]) * dr_6; | |||
| frc_abs = frc_abs - frc_cf_abs; | |||
| frc_lin.x = frc_abs * dr.x; | |||
| frc_lin.y = frc_abs * dr.y; | |||
| frc_lin.z = frc_abs * dr.z; | |||
| frc_record.x = frc_record.x + frc_lin.x; | |||
| frc_record.y = frc_record.y + frc_lin.y; | |||
| frc_record.z = frc_record.z + frc_lin.z; | |||
| atomicAdd(&frc[atom_j].x, -frc_lin.x); | |||
| atomicAdd(&frc[atom_j].y, -frc_lin.y); | |||
| atomicAdd(&frc[atom_j].z, -frc_lin.z); | |||
| } | |||
| } | |||
| atomicAdd(&frc[atom_i].x, frc_record.x); | |||
| atomicAdd(&frc[atom_i].y, frc_record.y); | |||
| atomicAdd(&frc[atom_i].z, frc_record.z); | |||
| atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2); | |||
| } | |||
| } | |||
| void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f, | |||
| float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy, | |||
| cudaStream_t stream) { | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); | |||
| VECTOR *frc = reinterpret_cast<VECTOR *>(frc_f); | |||
| VECTOR *scaler = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(scaler_f)); | |||
| int max_neighbor_numbers = 800; | |||
| NEIGHBOR_LIST *nl_a = reinterpret_cast<NEIGHBOR_LIST *>(nl); | |||
| construct_neighbor_list_kernel<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl_a); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ_a = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ_a, LJtype, charge); | |||
| LJ_Direct_CF_Force_With_Atom_Energy_CUDA<<<ceilf(static_cast<float>(atom_numbers) / 8), thread_LJ, 0, stream>>>( | |||
| atom_numbers, nl_a, uint_crd_with_LJ_a, scaler, d_LJ_A, d_LJ_B, cutoff, frc, pme_beta, TWO_DIVIDED_BY_SQRT_PI, | |||
| atom_energy); | |||
| return; | |||
| } | |||
| void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f, | |||
| float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy, | |||
| cudaStream_t stream); | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_LJ_LJ_PME_DIRECT_FORCE_WITH_ATOM_ENERGY_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f, | |||
| float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl, | |||
| const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy, | |||
| cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,139 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common_sponge.cuh" | |||
| __global__ void Dihedral14LJCFForceWithAtomEnergyAndVirialKernel( | |||
| const int dihedral_14_numbers, const UINT_VECTOR_LJ_TYPE *uint_crd, const VECTOR *scaler, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, VECTOR *frc, float *atom_energy, float *atom_virial) { | |||
| int dihedral_14_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (dihedral_14_i < dihedral_14_numbers) { | |||
| UINT_VECTOR_LJ_TYPE r1, r2; | |||
| VECTOR dr; | |||
| float dr_abs; | |||
| float dr2; | |||
| float dr_1; | |||
| float dr_2; | |||
| float dr_4; | |||
| float dr_8; | |||
| float dr_14; | |||
| float frc_abs = 0.; | |||
| VECTOR temp_frc; | |||
| float ene_lin; | |||
| float ene_lin2; | |||
| int x, y; | |||
| int atom_pair_LJ_type; | |||
| int atom_i = a_14[dihedral_14_i]; | |||
| int atom_j = b_14[dihedral_14_i]; | |||
| r1 = uint_crd[atom_i]; | |||
| r2 = uint_crd[atom_j]; | |||
| dr = Get_Periodic_Displacement(r2, r1, scaler[0]); | |||
| dr2 = dr.x * dr.x + dr.y * dr.y + dr.z * dr.z; | |||
| dr_2 = 1.0 / dr2; | |||
| dr_4 = dr_2 * dr_2; | |||
| dr_8 = dr_4 * dr_4; | |||
| dr_14 = dr_8 * dr_4 * dr_2; | |||
| dr_abs = norm3df(dr.x, dr.y, dr.z); | |||
| dr_1 = 1. / dr_abs; | |||
| // CF | |||
| float charge_i = r1.charge; | |||
| float charge_j = r2.charge; | |||
| float frc_cf_abs; | |||
| frc_cf_abs = cf_scale_factor[dihedral_14_i] * dr_2 * dr_1; | |||
| frc_cf_abs = -charge_i * charge_j * frc_cf_abs; | |||
| // LJ | |||
| y = (r2.LJ_type - r1.LJ_type); | |||
| x = y >> 31; | |||
| y = (y ^ x) - x; | |||
| x = r2.LJ_type + r1.LJ_type; | |||
| r2.LJ_type = (x + y) >> 1; | |||
| x = (x - y) >> 1; | |||
| atom_pair_LJ_type = (r2.LJ_type * (r2.LJ_type + 1) >> 1) + x; | |||
| frc_abs = -LJ_type_A[atom_pair_LJ_type] * dr_14 + LJ_type_B[atom_pair_LJ_type] * dr_8; | |||
| frc_abs *= lj_scale_factor[dihedral_14_i]; | |||
| frc_abs += frc_cf_abs; | |||
| temp_frc.x = frc_abs * dr.x; | |||
| temp_frc.y = frc_abs * dr.y; | |||
| temp_frc.z = frc_abs * dr.z; | |||
| atomicAdd(&frc[atom_j].x, -temp_frc.x); | |||
| atomicAdd(&frc[atom_j].y, -temp_frc.y); | |||
| atomicAdd(&frc[atom_j].z, -temp_frc.z); | |||
| atomicAdd(&frc[atom_i].x, temp_frc.x); | |||
| atomicAdd(&frc[atom_i].y, temp_frc.y); | |||
| atomicAdd(&frc[atom_i].z, temp_frc.z); | |||
| ene_lin = r1.charge * r2.charge * dr_1; | |||
| ene_lin *= cf_scale_factor[dihedral_14_i]; | |||
| ene_lin2 = 0.08333333 * LJ_type_A[atom_pair_LJ_type] * dr_4 * dr_8 - | |||
| 0.1666666 * LJ_type_B[atom_pair_LJ_type] * dr_4 * dr_2; // LJ的A,B系数已经乘以12和6因此要反乘 | |||
| ene_lin2 *= lj_scale_factor[dihedral_14_i]; | |||
| atomicAdd(&atom_energy[atom_i], ene_lin + ene_lin2); | |||
| atomicAdd(&atom_virial[atom_i], -temp_frc * dr); | |||
| } | |||
| } | |||
| void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, | |||
| float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *frc_f, float *atom_energy, | |||
| float *atom_virial, cudaStream_t stream) { | |||
| size_t thread_per_block = 128; | |||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||
| UNSIGNED_INT_VECTOR *uint_crd = | |||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | |||
| UINT_VECTOR_LJ_TYPE *uint_crd_with_LJ = reinterpret_cast<UINT_VECTOR_LJ_TYPE *>(uint_crd_with_LJ_f); | |||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(3 * atom_numbers, frc_f, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.); | |||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, atom_energy, 0.); | |||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | |||
| VECTOR *frc = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(frc_f)); | |||
| Dihedral14LJCFForceWithAtomEnergyAndVirialKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | |||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, | |||
| LJ_type_B, frc, atom_energy, atom_virial); | |||
| return; | |||
| } | |||
| void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, | |||
| float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *frc_f, float *atom_energy, | |||
| float *atom_virial, cudaStream_t stream); | |||
| @@ -0,0 +1,29 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| #include <curand_kernel.h> | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers, | |||
| const int *uint_crd_f, const int *LJtype, const float *charge, | |||
| float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | |||
| const int *b_14, const float *lj_scale_factor, | |||
| const float *cf_scale_factor, const float *LJ_type_A, | |||
| const float *LJ_type_B, float *frc_f, float *atom_energy, | |||
| float *atom_virial, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H | |||
| @@ -13,8 +13,31 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh" | |||
| #include <stdio.h> | |||
| #include <vector> | |||
| __device__ __host__ VECTOR operator-(const VECTOR &vecb) { | |||
| VECTOR vec; | |||
| vec.x = -vecb.x; | |||
| vec.y = -vecb.y; | |||
| vec.z = -vecb.z; | |||
| return vec; | |||
| } | |||
| __device__ __host__ VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR vec_b, const VECTOR box_length) { | |||
| VECTOR dr; | |||
| // dr = vec_a - vec_b; | |||
| dr.x = vec_a.x - vec_b.x; | |||
| dr.y = vec_a.y - vec_b.y; | |||
| dr.x = vec_a.z - vec_b.z; | |||
| dr.x = dr.x - floorf(dr.x / box_length.x + 0.5) * box_length.x; | |||
| dr.y = dr.y - floorf(dr.y / box_length.y + 0.5) * box_length.y; | |||
| dr.z = dr.z - floorf(dr.z / box_length.z + 0.5) * box_length.z; | |||
| return dr; | |||
| } | |||
| __global__ void Copy_List(const int element_numbers, const int *origin_list, int *list) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| @@ -32,15 +55,20 @@ __global__ void Crd_To_Uint_Crd(const int atom_numbers, float *scale_factor, con | |||
| UNSIGNED_INT_VECTOR *uint_crd) { | |||
| int atom_i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (atom_i < atom_numbers) { | |||
| uint_crd[atom_i].uint_x = crd[atom_i].x * scale_factor[0]; | |||
| uint_crd[atom_i].uint_y = crd[atom_i].y * scale_factor[1]; | |||
| uint_crd[atom_i].uint_z = crd[atom_i].z * scale_factor[2]; | |||
| /*uint_crd[atom_i].uint_x = 2 * uint_crd[atom_i].uint_x; | |||
| uint_crd[atom_i].uint_y = 2 * uint_crd[atom_i].uint_y; | |||
| uint_crd[atom_i].uint_z = 2 * uint_crd[atom_i].uint_z;*/ | |||
| uint_crd[atom_i].uint_x = uint_crd[atom_i].uint_x << 1; | |||
| uint_crd[atom_i].uint_y = uint_crd[atom_i].uint_y << 1; | |||
| uint_crd[atom_i].uint_z = uint_crd[atom_i].uint_z << 1; | |||
| INT_VECTOR tempi; | |||
| VECTOR temp = crd[atom_i]; | |||
| temp.x *= scale_factor[0]; | |||
| temp.y *= scale_factor[1]; | |||
| temp.z *= scale_factor[2]; | |||
| tempi.int_x = temp.x; | |||
| tempi.int_y = temp.y; | |||
| tempi.int_z = temp.z; | |||
| uint_crd[atom_i].uint_x = (tempi.int_x << 2); | |||
| uint_crd[atom_i].uint_y = (tempi.int_y << 2); | |||
| uint_crd[atom_i].uint_z = (tempi.int_z << 2); | |||
| } | |||
| } | |||
| @@ -80,7 +108,6 @@ __global__ void Crd_Periodic_Map(const int atom_numbers, VECTOR *crd, const floa | |||
| } else { | |||
| crd[atom_i].y = crd[atom_i].y + box_length[1]; | |||
| } | |||
| if (crd[atom_i].z >= 0) { | |||
| if (crd[atom_i].z < box_length[2]) { | |||
| } else { | |||
| @@ -198,6 +225,21 @@ __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const | |||
| } | |||
| } | |||
| __global__ void Is_need_refresh_neighbor_list_cuda(const int atom_numbers, const VECTOR *crd, const VECTOR *old_crd, | |||
| const VECTOR *box_length, const float half_skin_square, | |||
| int *need_refresh_flag) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < atom_numbers) { | |||
| VECTOR r1 = crd[i]; | |||
| VECTOR r2 = old_crd[i]; | |||
| r1 = Get_Periodic_Displacement(r1, r2, box_length[0]); | |||
| float r1_2 = r1.x * r1.x + r1.y * r1.y + r1.z * r1.z; | |||
| if (r1_2 > half_skin_square) { | |||
| atomicExch(&need_refresh_flag[0], 1); | |||
| } | |||
| } | |||
| } | |||
| __global__ void Delete_Excluded_Atoms_Serial_In_Neighbor_List(const int atom_numbers, NEIGHBOR_LIST *nl, | |||
| const int *excluded_list_start, const int *excluded_list, | |||
| const int *excluded_atom_numbers) { | |||
| @@ -245,27 +287,18 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, | |||
| float cutoff_skin_square, int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, | |||
| cudaStream_t stream) { | |||
| if (refresh_sign[0] == 1) { | |||
| VECTOR trans_vec = {-skin, -skin, -skin}; | |||
| std::vector<int> h_refresh_sign(1); | |||
| cudaMemcpyAsync(h_refresh_sign.data(), refresh_sign, sizeof(int), cudaMemcpyDeviceToHost, stream); | |||
| if (h_refresh_sign[0] == 1) { | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / thread), thread, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| trans_vec); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| trans_vec.x = -trans_vec.x; | |||
| trans_vec.y = -trans_vec.y; | |||
| trans_vec.z = -trans_vec.z; | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>(atom_numbers, crd, | |||
| trans_vec); | |||
| Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / thread), thread, 0, stream>>>( | |||
| 3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd)); | |||
| @@ -282,40 +315,10 @@ void Refresh_Neighbor_List(int *refresh_sign, const int thread, const int atom_n | |||
| Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, | |||
| stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers); | |||
| refresh_sign[0] = 0; | |||
| h_refresh_sign[0] = 0; | |||
| } | |||
| } | |||
| void Refresh_Neighbor_List_First_Time(int *refresh_sign, const int thread, const int atom_numbers, VECTOR *crd, | |||
| VECTOR *old_crd, UNSIGNED_INT_VECTOR *uint_crd, float *crd_to_uint_crd_cof, | |||
| float *uint_dr_to_dr_cof, int *atom_in_grid_serial, const float skin, | |||
| float *box_length, const GRID_POINTER *gpointer, GRID_BUCKET *bucket, | |||
| int *atom_numbers_in_grid_bucket, NEIGHBOR_LIST *d_nl, int *excluded_list_start, | |||
| int *excluded_list, int *excluded_numbers, float cutoff_skin_square, | |||
| int grid_numbers, float *grid_length_inverse, int *grid_N, int Nxy, | |||
| cudaStream_t stream) { | |||
| VECTOR trans_vec = {skin, skin, skin}; | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| Copy_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 32), 32, 0, stream>>>( | |||
| 3 * atom_numbers, reinterpret_cast<float *>(crd), reinterpret_cast<float *>(old_crd)); | |||
| Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, atom_in_grid_serial, bucket, atom_numbers_in_grid_bucket); | |||
| Crd_To_Uint_Crd<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd_to_uint_crd_cof, | |||
| crd, uint_crd); | |||
| Find_atom_neighbors<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, stream>>>( | |||
| atom_numbers, uint_crd, uint_dr_to_dr_cof, atom_in_grid_serial, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, | |||
| cutoff_skin_square); | |||
| Delete_Excluded_Atoms_Serial_In_Neighbor_List<<<ceilf(static_cast<float>(atom_numbers) / thread), thread, 0, | |||
| stream>>>(atom_numbers, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers); | |||
| } | |||
| __global__ void construct_neighbor_list_kernel(int atom_numbers, int max_neighbor_numbers, int *nl_atom_numbers, | |||
| int *nl_atom_serial, NEIGHBOR_LIST *nl) { | |||
| for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { | |||
| @@ -330,15 +333,39 @@ void Construct_Neighbor_List(int atom_numbers, int max_neighbor_numbers, int *nl | |||
| atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl); | |||
| } | |||
| __global__ void copy_neighbor_list_atom_number(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers) { | |||
| for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { | |||
| __global__ void copy_neighbor_list_atom_number(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, | |||
| int *nl_atom_numbers, int *nl_atom_serial) { | |||
| int i, j; | |||
| for (i = blockIdx.x * blockDim.x + threadIdx.x; i < atom_numbers; i += gridDim.x * blockDim.x) { | |||
| nl_atom_numbers[i] = nl[i].atom_numbers; | |||
| for (j = blockIdx.y * blockDim.y + threadIdx.y; j < max_neighbor_numbers; j += gridDim.y * blockDim.y) { | |||
| if (j < nl_atom_numbers[i]) { | |||
| nl_atom_serial[i * max_neighbor_numbers + j] = nl[i].atom_serial[j]; | |||
| } else { | |||
| nl_atom_serial[i * max_neighbor_numbers + j] = 0; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream) { | |||
| copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, nl, | |||
| nl_atom_numbers); | |||
| __global__ void Reset_List(const int element_numbers, int *list, const int replace_element) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = replace_element; | |||
| } | |||
| } | |||
| __global__ void Reset_List(const int element_numbers, float *list, const float replace_element) { | |||
| int i = blockDim.x * blockIdx.x + threadIdx.x; | |||
| if (i < element_numbers) { | |||
| list[i] = replace_element; | |||
| } | |||
| } | |||
| void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, | |||
| int *nl_atom_serial, cudaStream_t stream) { | |||
| copy_neighbor_list_atom_number<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial); | |||
| } | |||
| void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float skin, int Nxy, float cutoff_skin_square, | |||
| @@ -348,22 +375,13 @@ void Refresh_Neighbor_List_No_Check(int grid_numbers, int atom_numbers, float sk | |||
| UNSIGNED_INT_VECTOR *uint_crd, float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, | |||
| NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, | |||
| int *excluded_numbers, cudaStream_t stream) { | |||
| VECTOR trans_vec = {-skin, -skin, -skin}; | |||
| Clear_Grid_Bucket<<<ceilf(static_cast<float>(grid_numbers) / 32), 32, 0, stream>>>( | |||
| grid_numbers, atom_numbers_in_grid_bucket, bucket); | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| Crd_Periodic_Map<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, box_length); | |||
| Find_Atom_In_Grid_Serial<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| atom_numbers, grid_length_inverse, crd, grid_N, Nxy, atom_in_grid_serial); | |||
| trans_vec.x = -trans_vec.x; | |||
| trans_vec.y = -trans_vec.y; | |||
| trans_vec.z = -trans_vec.z; | |||
| Vector_Translation<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>(atom_numbers, crd, trans_vec); | |||
| cudaMemcpyAsync(old_crd, crd, sizeof(VECTOR) * atom_numbers, cudaMemcpyDeviceToDevice, stream); | |||
| Put_Atom_In_Grid_Bucket<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | |||
| @@ -387,51 +405,53 @@ __global__ void Mul_half(float *src, float *dst) { | |||
| } | |||
| } | |||
| void Neighbor_List_Update(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, | |||
| int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, | |||
| int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, | |||
| float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, | |||
| float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, | |||
| int *is_need_refresh_neighbor_list, cudaStream_t stream) { | |||
| if (not_first_time) { | |||
| if (refresh_interval > 0) { | |||
| std::vector<int> refresh_count_list(1); | |||
| cudaMemcpyAsync(refresh_count_list.data(), d_refresh_count, sizeof(int), cudaMemcpyDeviceToHost, stream); | |||
| cudaStreamSynchronize(stream); | |||
| int refresh_count = refresh_count_list[0]; | |||
| if (refresh_count % refresh_interval == 0) { | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, | |||
| atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket, | |||
| reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), | |||
| half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers, stream); | |||
| } | |||
| refresh_count += 1; | |||
| cudaMemcpyAsync(d_refresh_count, &refresh_count, sizeof(int), cudaMemcpyHostToDevice, stream); | |||
| } else { | |||
| Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), half_skin_square, | |||
| is_need_refresh_neighbor_list); | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, | |||
| gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy, | |||
| stream); | |||
| __global__ void Mul_quarter(float *src, float *dst) { | |||
| int index = threadIdx.x; | |||
| if (index < 3) { | |||
| dst[index] = src[index] * 0.25; | |||
| } | |||
| } | |||
| int refresh_count = 0; | |||
| void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, | |||
| float cutoff_with_skin_square, int *grid_N, float *box_length, | |||
| int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, | |||
| GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof, | |||
| float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof, | |||
| GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, | |||
| int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, | |||
| int forced_update, int forced_check, cudaStream_t stream) { | |||
| if (forced_update) { | |||
| Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_No_Check( | |||
| grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, atom_numbers_in_grid_bucket, | |||
| grid_length_inverse, atom_in_grid_serial, bucket, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, excluded_numbers, stream); | |||
| } else if (refresh_interval > 0 && !forced_check) { | |||
| if (refresh_count % refresh_interval == 0) { | |||
| Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_No_Check(grid_numbers, atom_numbers, skin, Nxy, cutoff_square, grid_N, box_length, | |||
| atom_numbers_in_grid_bucket, grid_length_inverse, atom_in_grid_serial, bucket, | |||
| reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), | |||
| half_crd_to_uint_crd_cof, reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| uint_dr_to_dr_cof, gpointer, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers, stream); | |||
| } | |||
| refresh_count += 1; | |||
| } else { | |||
| Mul_half<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List_First_Time( | |||
| is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), half_crd_to_uint_crd_cof, | |||
| uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, bucket, atom_numbers_in_grid_bucket, d_nl, | |||
| excluded_list_start, excluded_list, excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, | |||
| grid_N, Nxy, stream); | |||
| Is_need_refresh_neighbor_list_cuda<<<ceilf(static_cast<float>(atom_numbers) / 128), 128, 0, stream>>>( | |||
| atom_numbers, reinterpret_cast<VECTOR *>(crd), reinterpret_cast<VECTOR *>(old_crd), | |||
| reinterpret_cast<VECTOR *>(box_length), half_skin_square, is_need_refresh_neighbor_list); | |||
| Mul_quarter<<<1, 3, 0, stream>>>(crd_to_uint_crd_cof, half_crd_to_uint_crd_cof); | |||
| Refresh_Neighbor_List(is_need_refresh_neighbor_list, 32, atom_numbers, reinterpret_cast<VECTOR *>(crd), | |||
| reinterpret_cast<VECTOR *>(old_crd), reinterpret_cast<UNSIGNED_INT_VECTOR *>(uint_crd), | |||
| half_crd_to_uint_crd_cof, uint_dr_to_dr_cof, atom_in_grid_serial, skin, box_length, gpointer, | |||
| bucket, atom_numbers_in_grid_bucket, d_nl, excluded_list_start, excluded_list, | |||
| excluded_numbers, cutoff_with_skin_square, grid_numbers, grid_length_inverse, grid_N, Nxy, | |||
| stream); | |||
| } | |||
| } | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_NEW_IMPL_H_ | |||
| struct VECTOR { | |||
| float x; | |||
| @@ -46,15 +46,17 @@ struct GRID_POINTER { | |||
| void Construct_Neighbor_List(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, | |||
| NEIGHBOR_LIST *nl, cudaStream_t stream); | |||
| void CopyNeighborListAtomNumber(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream); | |||
| void CopyNeighborListAtomNumber(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, | |||
| int *nl_atom_serial, cudaStream_t stream); | |||
| void Neighbor_List_Update(int grid_numbers, int atom_numbers, int* d_refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, float cutoff_with_skin_square, | |||
| int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, | |||
| int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, | |||
| float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, | |||
| float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, | |||
| int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, | |||
| int *is_need_refresh_neighbor_list, cudaStream_t stream); | |||
| void Neighbor_List_Update_New(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, | |||
| int not_first_time, float skin, int Nxy, float cutoff_square, | |||
| float cutoff_with_skin_square, int *grid_N, float *box_length, | |||
| int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, | |||
| GRID_BUCKET *bucket, float *crd, float *old_crd, float *crd_to_uint_crd_cof, | |||
| float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, float *uint_dr_to_dr_cof, | |||
| GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, | |||
| int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, | |||
| int forced_update, int forced_check, cudaStream_t stream); | |||
| #endif | |||
| @@ -0,0 +1,35 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/bond/bond_force_with_atom_energy_and_virial_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_THREE(BondForceWithAtomEnergyAndVirial, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| BondForceWithAtomEnergyAndVirialGpuKernel, float, int, unsigned int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,112 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_BOND_BOND_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_BOND_BOND_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh" | |||
| #include <cuda_runtime_api.h> | |||
| #include <map> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1, typename T2> | |||
| class BondForceWithAtomEnergyAndVirialGpuKernel : public GpuKernel { | |||
| public: | |||
| BondForceWithAtomEnergyAndVirialGpuKernel() : ele_uint_crd(1) {} | |||
| ~BondForceWithAtomEnergyAndVirialGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| // get bond_numbers | |||
| kernel_node_ = kernel_node; | |||
| bond_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "bond_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_scaler = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_atom_a = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_atom_b = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_bond_k = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_bond_r0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_scaler.size(); i++) ele_scaler *= shape_scaler[i]; | |||
| for (size_t i = 0; i < shape_atom_a.size(); i++) ele_atom_a *= shape_atom_a[i]; | |||
| for (size_t i = 0; i < shape_atom_b.size(); i++) ele_atom_b *= shape_atom_b[i]; | |||
| for (size_t i = 0; i < shape_bond_k.size(); i++) ele_bond_k *= shape_bond_k[i]; | |||
| for (size_t i = 0; i < shape_bond_r0.size(); i++) ele_bond_r0 *= shape_bond_r0[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T2>(inputs, 0); | |||
| auto scaler_f = GetDeviceAddress<T>(inputs, 1); | |||
| auto atom_a = GetDeviceAddress<const T1>(inputs, 2); | |||
| auto atom_b = GetDeviceAddress<const T1>(inputs, 3); | |||
| auto bond_k = GetDeviceAddress<T>(inputs, 4); | |||
| auto bond_r0 = GetDeviceAddress<T>(inputs, 5); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 0); | |||
| auto atom_energy = GetDeviceAddress<T>(outputs, 1); | |||
| auto atom_v = GetDeviceAddress<T>(outputs, 2); | |||
| BondForceWithAtomEnergyAndVirial(bond_numbers, atom_numbers, uint_crd_f, scaler_f, atom_a, atom_b, bond_k, bond_r0, | |||
| frc_f, atom_energy, atom_v, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T2)); | |||
| input_size_list_.push_back(ele_scaler * sizeof(T)); | |||
| input_size_list_.push_back(ele_atom_a * sizeof(T1)); | |||
| input_size_list_.push_back(ele_atom_b * sizeof(T1)); | |||
| input_size_list_.push_back(ele_bond_k * sizeof(T)); | |||
| input_size_list_.push_back(ele_bond_r0 * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * 3 * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_scaler = 1; | |||
| size_t ele_atom_a = 1; | |||
| size_t ele_atom_b = 1; | |||
| size_t ele_bond_k = 1; | |||
| size_t ele_bond_r0 = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int bond_numbers; | |||
| int atom_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,31 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/common/get_center_of_mass_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(GetCenterOfMass, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| GetCenterOfMassGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,101 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/get_center_of_mass_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class GetCenterOfMassGpuKernel : public GpuKernel { | |||
| public: | |||
| GetCenterOfMassGpuKernel() : ele_start(1) {} | |||
| ~GetCenterOfMassGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers")); | |||
| auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_atom_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_residue_mass_inverse = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; | |||
| for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; | |||
| for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; | |||
| for (size_t i = 0; i < shape_atom_mass.size(); i++) ele_atom_mass *= shape_atom_mass[i]; | |||
| for (size_t i = 0; i < shape_residue_mass_inverse.size(); i++) | |||
| ele_residue_mass_inverse *= shape_residue_mass_inverse[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto start = GetDeviceAddress<T1>(inputs, 0); | |||
| auto end = GetDeviceAddress<T1>(inputs, 1); | |||
| auto crd = GetDeviceAddress<T>(inputs, 2); | |||
| auto atom_mass = GetDeviceAddress<T>(inputs, 3); | |||
| auto residue_mass_inverse = GetDeviceAddress<T>(inputs, 4); | |||
| auto center_of_mass = GetDeviceAddress<T>(outputs, 0); | |||
| GetCenterOfMass(residue_numbers, start, end, crd, atom_mass, residue_mass_inverse, center_of_mass, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_start * sizeof(T1)); | |||
| input_size_list_.push_back(ele_end * sizeof(T1)); | |||
| input_size_list_.push_back(ele_crd * sizeof(T)); | |||
| input_size_list_.push_back(ele_atom_mass * sizeof(T)); | |||
| input_size_list_.push_back(ele_residue_mass_inverse * sizeof(T)); | |||
| output_size_list_.push_back(3 * sizeof(T) * residue_numbers); | |||
| } | |||
| private: | |||
| size_t ele_start = 1; | |||
| size_t ele_end = 1; | |||
| size_t ele_crd = 1; | |||
| size_t ele_atom_mass = 1; | |||
| size_t ele_residue_mass_inverse = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int residue_numbers; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_GET_CENTER_OF_MASS_KERNEL_H_ | |||
| @@ -0,0 +1,32 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/common/map_center_of_mass_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(MapCenterOfMass, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| MapCenterOfMassGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,105 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/common/map_center_of_mass_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class MapCenterOfMassGpuKernel : public GpuKernel { | |||
| public: | |||
| MapCenterOfMassGpuKernel() : ele_start(1) {} | |||
| ~MapCenterOfMassGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| residue_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "residue_numbers")); | |||
| scaler = static_cast<float>(GetAttr<float>(kernel_node, "scaler")); | |||
| auto shape_start = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_end = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_center_of_mass = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_box_length = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_no_wrap_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| for (size_t i = 0; i < shape_start.size(); i++) ele_start *= shape_start[i]; | |||
| for (size_t i = 0; i < shape_end.size(); i++) ele_end *= shape_end[i]; | |||
| for (size_t i = 0; i < shape_center_of_mass.size(); i++) ele_center_of_mass *= shape_center_of_mass[i]; | |||
| for (size_t i = 0; i < shape_box_length.size(); i++) ele_box_length *= shape_box_length[i]; | |||
| for (size_t i = 0; i < shape_no_wrap_crd.size(); i++) ele_no_wrap_crd *= shape_no_wrap_crd[i]; | |||
| for (size_t i = 0; i < shape_crd.size(); i++) ele_crd *= shape_crd[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto start = GetDeviceAddress<T1>(inputs, 0); | |||
| auto end = GetDeviceAddress<T1>(inputs, 1); | |||
| auto center_of_mass = GetDeviceAddress<T>(inputs, 2); | |||
| auto box_length = GetDeviceAddress<T>(inputs, 3); | |||
| auto no_wrap_crd = GetDeviceAddress<T>(inputs, 4); | |||
| auto crd = GetDeviceAddress<T>(inputs, 5); | |||
| MapCenterOfMass(residue_numbers, start, end, scaler, center_of_mass, box_length, no_wrap_crd, crd, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_start * sizeof(T1)); | |||
| input_size_list_.push_back(ele_end * sizeof(T1)); | |||
| input_size_list_.push_back(ele_center_of_mass * sizeof(T)); | |||
| input_size_list_.push_back(ele_box_length * sizeof(T)); | |||
| input_size_list_.push_back(ele_no_wrap_crd * sizeof(T)); | |||
| input_size_list_.push_back(ele_crd * sizeof(T)); | |||
| output_size_list_.push_back(sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_start = 1; | |||
| size_t ele_end = 1; | |||
| size_t ele_center_of_mass = 1; | |||
| size_t ele_box_length = 1; | |||
| size_t ele_no_wrap_crd = 1; | |||
| size_t ele_crd = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int residue_numbers; | |||
| float scaler; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_COMMON_MAP_CENTER_OF_MASS_KERNEL_H_ | |||
| @@ -0,0 +1,39 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(Dihedral14LJCFForceWithAtomEnergyAndVirial, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeUInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel, float, int) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,145 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONG_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_AND_VIRIAL_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel : public GpuKernel { | |||
| public: | |||
| Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel() : ele_uint_crd(1) {} | |||
| ~Dihedral14LJCFForceWithAtomEnergyAndVirialGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| dihedral_14_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "dihedral_14_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| auto shape_uint_crd = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||
| auto shape_LJtype = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); | |||
| auto shape_charge = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); | |||
| auto shape_boxlength_f = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); | |||
| auto shape_a_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); | |||
| auto shape_b_14 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5); | |||
| auto shape_lj_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6); | |||
| auto shape_cf_scale_factor = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 7); | |||
| auto shape_LJ_type_A = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 8); | |||
| auto shape_LJ_type_B = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9); | |||
| for (size_t i = 0; i < shape_uint_crd.size(); i++) ele_uint_crd *= shape_uint_crd[i]; | |||
| for (size_t i = 0; i < shape_LJtype.size(); i++) ele_LJtype *= shape_LJtype[i]; | |||
| for (size_t i = 0; i < shape_charge.size(); i++) ele_charge *= shape_charge[i]; | |||
| for (size_t i = 0; i < shape_boxlength_f.size(); i++) ele_boxlength_f *= shape_boxlength_f[i]; | |||
| for (size_t i = 0; i < shape_a_14.size(); i++) ele_a_14 *= shape_a_14[i]; | |||
| for (size_t i = 0; i < shape_b_14.size(); i++) ele_b_14 *= shape_b_14[i]; | |||
| for (size_t i = 0; i < shape_lj_scale_factor.size(); i++) ele_lj_scale_factor *= shape_lj_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_cf_scale_factor.size(); i++) ele_cf_scale_factor *= shape_cf_scale_factor[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_A.size(); i++) ele_LJ_type_A *= shape_LJ_type_A[i]; | |||
| for (size_t i = 0; i < shape_LJ_type_B.size(); i++) ele_LJ_type_B *= shape_LJ_type_B[i]; | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0); | |||
| auto LJtype = GetDeviceAddress<const T1>(inputs, 1); | |||
| auto charge = GetDeviceAddress<const T>(inputs, 2); | |||
| auto boxlength_f = GetDeviceAddress<T>(inputs, 3); | |||
| auto a_14 = GetDeviceAddress<const T1>(inputs, 4); | |||
| auto b_14 = GetDeviceAddress<const T1>(inputs, 5); | |||
| auto lj_scale_factor = GetDeviceAddress<T>(inputs, 6); | |||
| auto cf_scale_factor = GetDeviceAddress<T>(inputs, 7); | |||
| auto LJ_type_A = GetDeviceAddress<T>(inputs, 8); | |||
| auto LJ_type_B = GetDeviceAddress<T>(inputs, 9); | |||
| auto frc_f = GetDeviceAddress<T>(outputs, 0); | |||
| auto atom_energy = GetDeviceAddress<T>(outputs, 1); | |||
| auto atom_virial = GetDeviceAddress<T>(outputs, 2); | |||
| auto uint_crd_with_LJ = GetDeviceAddress<T>(workspace, 0); | |||
| Dihedral14LJCFForceWithAtomEnergyAndVirial(dihedral_14_numbers, atom_numbers, uint_crd_f, LJtype, charge, | |||
| uint_crd_with_LJ, boxlength_f, a_14, b_14, lj_scale_factor, | |||
| cf_scale_factor, LJ_type_A, LJ_type_B, frc_f, atom_energy, atom_virial, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(ele_uint_crd * sizeof(T1)); | |||
| input_size_list_.push_back(ele_LJtype * sizeof(T1)); | |||
| input_size_list_.push_back(ele_charge * sizeof(T)); | |||
| input_size_list_.push_back(ele_boxlength_f * sizeof(T)); | |||
| input_size_list_.push_back(ele_a_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | |||
| input_size_list_.push_back(ele_lj_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_A * sizeof(T)); | |||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | |||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | |||
| output_size_list_.push_back(3 * atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||
| } | |||
| private: | |||
| size_t ele_uint_crd = 1; | |||
| size_t ele_LJtype = 1; | |||
| size_t ele_charge = 1; | |||
| size_t ele_boxlength_f = 1; | |||
| size_t ele_a_14 = 1; | |||
| size_t ele_b_14 = 1; | |||
| size_t ele_lj_scale_factor = 1; | |||
| size_t ele_cf_scale_factor = 1; | |||
| size_t ele_LJ_type_A = 1; | |||
| size_t ele_LJ_type_B = 1; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| int dihedral_14_numbers; | |||
| int atom_numbers; | |||
| struct UINT_VECTOR_LJ_TYPE { | |||
| unsigned int uint_x; | |||
| unsigned int uint_y; | |||
| unsigned int uint_z; | |||
| int LJ_type; | |||
| float charge; | |||
| }; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_TWO(NeighborListUpdate, | |||
| MS_REG_GPU_KERNEL_TWO(NeighborListUpdateNew, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| @@ -40,6 +40,6 @@ MS_REG_GPU_KERNEL_TWO(NeighborListUpdate, | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddInputAttr(kNumberTypeInt32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| NeighborListUpdateGpuKernel, int, float) | |||
| NeighborListUpdateNewGpuKernel, int, float) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_KERNEL_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_NEIGHBOR_LIST_UPDATE_NEW_KERNEL_H_ | |||
| #include <cuda_runtime_api.h> | |||
| #include <vector> | |||
| @@ -24,15 +24,15 @@ | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| #include "runtime/device/gpu/cuda_common.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/sponge/neighbor_list/neighbor_list_new_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T, typename T1> | |||
| class NeighborListUpdateGpuKernel : public GpuKernel { | |||
| class NeighborListUpdateNewGpuKernel : public GpuKernel { | |||
| public: | |||
| NeighborListUpdateGpuKernel() : skin(2.0), cutoff(10.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {} | |||
| ~NeighborListUpdateGpuKernel() override = default; | |||
| NeighborListUpdateNewGpuKernel() : skin(2.0), cutoff(9.0), max_atom_in_grid_numbers(64), max_neighbor_numbers(800) {} | |||
| ~NeighborListUpdateNewGpuKernel() override = default; | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| grid_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "grid_numbers")); | |||
| atom_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "atom_numbers")); | |||
| @@ -46,6 +46,8 @@ class NeighborListUpdateGpuKernel : public GpuKernel { | |||
| cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin")); | |||
| half_cutoff_with_skin = static_cast<float>(GetAttr<float>(kernel_node, "half_cutoff_with_skin")); | |||
| cutoff_with_skin_square = static_cast<float>(GetAttr<float>(kernel_node, "cutoff_with_skin_square")); | |||
| forced_update = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_update")); | |||
| forced_check = static_cast<int>(GetAttr<int64_t>(kernel_node, "forced_check")); | |||
| h_bucket.resize(grid_numbers); | |||
| h_gpointer.resize(grid_numbers); | |||
| InitSizeLists(); | |||
| @@ -99,13 +101,14 @@ class NeighborListUpdateGpuKernel : public GpuKernel { | |||
| Construct_Neighbor_List(atom_numbers, max_neighbor_numbers, nl_atom_numbers, nl_atom_serial, nl, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| Neighbor_List_Update(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy, | |||
| cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket, | |||
| grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof, | |||
| half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start, | |||
| excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| CopyNeighborListAtomNumber(atom_numbers, nl, nl_atom_numbers, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| Neighbor_List_Update_New(grid_numbers, atom_numbers, d_refresh_count, refresh_interval, not_first_time, skin, nxy, | |||
| cutoff_square, cutoff_with_skin_square, grid_n, box_length, atom_numbers_in_grid_bucket, | |||
| grid_length_inverse, atom_in_grid_serial, d_bucket, crd, old_crd, crd_to_uint_crd_cof, | |||
| half_crd_to_uint_crd_cof, uint_crd, uint_dr_to_dr_cof, d_gpointer, nl, excluded_list_start, | |||
| excluded_list, excluded_numbers, half_skin_square, need_refresh_flag, forced_update, | |||
| forced_check, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| CopyNeighborListAtomNumber(atom_numbers, max_neighbor_numbers, nl, nl_atom_numbers, nl_atom_serial, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| @@ -160,6 +163,8 @@ class NeighborListUpdateGpuKernel : public GpuKernel { | |||
| float cutoff_with_skin; | |||
| float half_cutoff_with_skin; | |||
| float cutoff_with_skin_square; | |||
| int forced_update; | |||
| int forced_check; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||