From: @mamba_ni Reviewed-by: @ljl0711,@wang_zi_dong Signed-off-by: @wang_zi_dongpull/14605/MERGE
| @@ -54,8 +54,8 @@ __global__ void Dihedral14CFEnergyKernel(const int dihedral_14_numbers, const UI | |||||
| void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, | ||||
| const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | ||||
| const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) { | const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream) { | ||||
| size_t thread_per_block = 128; | |||||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||||
| size_t thread_per_block = 32; | |||||
| size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32); | |||||
| UNSIGNED_INT_VECTOR *uint_crd = | UNSIGNED_INT_VECTOR *uint_crd = | ||||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | ||||
| @@ -65,12 +65,9 @@ void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, c | |||||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | ||||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | ||||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(atom_numbers, ene, 0.); | |||||
| Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | Dihedral14CFEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | ||||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); | dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, cf_scale_factor, ene); | ||||
| cudaStreamSynchronize(stream); | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -111,7 +111,7 @@ void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int | |||||
| const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy, | const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy, | ||||
| cudaStream_t stream) { | cudaStream_t stream) { | ||||
| size_t thread_per_block = 128; | size_t thread_per_block = 128; | ||||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||||
| size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 128); | |||||
| UNSIGNED_INT_VECTOR *uint_crd = | UNSIGNED_INT_VECTOR *uint_crd = | ||||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | ||||
| @@ -129,8 +129,6 @@ void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int | |||||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, | dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, cf_scale_factor, LJ_type_A, | ||||
| LJ_type_B, frc, atom_energy); | LJ_type_B, frc, atom_energy); | ||||
| cudaStreamSynchronize(stream); | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -75,8 +75,8 @@ void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, c | |||||
| const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, | ||||
| const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, | ||||
| float *ene, cudaStream_t stream) { | float *ene, cudaStream_t stream) { | ||||
| size_t thread_per_block = 128; | |||||
| size_t block_per_grid = ceilf(static_cast<float>(atom_numbers) / 128); | |||||
| size_t thread_per_block = 32; | |||||
| size_t block_per_grid = ceilf(static_cast<float>(dihedral_14_numbers) / 32); | |||||
| UNSIGNED_INT_VECTOR *uint_crd = | UNSIGNED_INT_VECTOR *uint_crd = | ||||
| const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | const_cast<UNSIGNED_INT_VECTOR *>(reinterpret_cast<const UNSIGNED_INT_VECTOR *>(uint_crd_f)); | ||||
| @@ -84,14 +84,11 @@ void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, c | |||||
| Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | Copy_Crd_To_New_Crd_Start<<<ceilf(static_cast<float>(atom_numbers) / 32), 32, 0, stream>>>( | ||||
| atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | atom_numbers, uint_crd, uint_crd_with_LJ, LJtype, charge); | ||||
| Reset_List<<<ceilf(static_cast<float>(3. * atom_numbers) / 128), 128, 0, stream>>>(dihedral_14_numbers, ene, 0.); | |||||
| VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | VECTOR *boxlength = const_cast<VECTOR *>(reinterpret_cast<const VECTOR *>(boxlength_f)); | ||||
| Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | Dihedral14LJEnergyKernel<<<block_per_grid, thread_per_block, 0, stream>>>( | ||||
| dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); | dihedral_14_numbers, uint_crd_with_LJ, boxlength, a_14, b_14, lj_scale_factor, LJ_type_A, LJ_type_B, ene); | ||||
| cudaStreamSynchronize(stream); | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -92,7 +92,7 @@ class Dihedral14CFEnergyGpuKernel : public GpuKernel { | |||||
| input_size_list_.push_back(ele_b_14 * sizeof(T1)); | input_size_list_.push_back(ele_b_14 * sizeof(T1)); | ||||
| input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | input_size_list_.push_back(ele_cf_scale_factor * sizeof(T)); | ||||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | ||||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||||
| output_size_list_.push_back(dihedral_14_numbers * sizeof(T)); | |||||
| } | } | ||||
| private: | private: | ||||
| @@ -102,7 +102,7 @@ class Dihedral14LJEnergyGpuKernel : public GpuKernel { | |||||
| input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | input_size_list_.push_back(ele_LJ_type_B * sizeof(T)); | ||||
| workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | workspace_size_list_.push_back(atom_numbers * sizeof(UINT_VECTOR_LJ_TYPE)); | ||||
| output_size_list_.push_back(atom_numbers * sizeof(T)); | |||||
| output_size_list_.push_back(dihedral_14_numbers * sizeof(T)); | |||||
| } | } | ||||
| private: | private: | ||||