diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_base.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_base.h new file mode 100644 index 0000000000..9cb7ad1d4a --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_base.h @@ -0,0 +1,45 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_BASE_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_BASE_H_ + +#include +#include +#include +#include "plugin/device/gpu/kernel/gpu_kernel.h" +#include "plugin/device/gpu/kernel/gpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class BatchAssignBaseMod : public NativeGpuKernelMod { + public: + BatchAssignBaseMod() = default; + ~BatchAssignBaseMod() override = default; + + virtual bool Init(const CNodePtr &kernel_node) = 0; + + protected: + void InitSizeLists() {} + // Using shared_mutex to achieve the followings: + // The read-write lock can only have one writer or multiple readers at the same time, + // but it can't have both readers and writers at the same time. + static std::shared_mutex rw_mutex_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_BASE_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.cc new file mode 100644 index 0000000000..e5aa60703c --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.cc @@ -0,0 +1,85 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.h" +#include "kernel/common_utils.h" + +namespace mindspore { +namespace kernel { +constexpr size_t kHalf = 2; +// Init shared_mutex in base. +std::shared_mutex BatchAssignBaseMod::rw_mutex_; + +BatchAssignKernelMod::BatchAssignKernelMod() : elements_num_(0), lock_(false) {} + +bool BatchAssignKernelMod::Init(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + kernel_node_ = kernel_node; + kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); + lock_ = GetAttr(kernel_node, "lock"); + size_t input_num = common::AnfAlgo::GetInputNum(kernel_node); + elements_num_ = input_num / kHalf; + // Compute the size for each input. There has two input lists. + // Each list has the same elements number, shape series, type series. + for (size_t i = 0; i < elements_num_; i++) { + auto type = AnfAlgo::GetInputDeviceDataType(kernel_node, i); + size_t element_size = GetTypeByte(TypeIdToType(type)); + auto shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); + for (auto x : shape) { + element_size *= LongToSize(x); + } + input_size_list_.push_back(element_size); + } + // Set input size for another input list. + for (size_t i = 0; i < elements_num_; i++) { + input_size_list_.push_back(input_size_list_[i]); + } + // Set an output for placeholder. + output_size_list_.push_back(sizeof(float)); + return true; +} + +bool BatchAssignKernelMod::Launch(const std::vector &inputs, const std::vector &, + const std::vector &, void *stream) { + auto cuda_stream = reinterpret_cast(stream); + // Using shared lock for reader so there can be more than one readers in the same time. + // Using lock for writer to ensure there's only one writer at a time. + if (lock_) { + // Execute rw_mutex_.unlock() in lock's deconstruct. + std::unique_lock lock(rw_mutex_); + } else { + // Execute rw_mutex_.unlock_shared() in lock's deconstruct. + std::shared_lock lock(rw_mutex_); + } + // Usually, we will get two inputs list, the first half are the weights to be updated, and the last half + // are the sources. So we just copy the source to overwrite the dst. + for (size_t i = 0; i < elements_num_; i++) { + auto local_addr = GetDeviceAddress(inputs, i); + auto source_addr = GetDeviceAddress(inputs, i + elements_num_); + MS_ERROR_IF_NULL(local_addr); + MS_ERROR_IF_NULL(source_addr); + CHECK_CUDA_RET_WITH_EXCEPT( + kernel_node_, + cudaMemcpyAsync(local_addr, source_addr, input_size_list_[i], cudaMemcpyDeviceToDevice, cuda_stream), + "Overwrite failed"); + } + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(cuda_stream), + "BatchAssignKernel cudaStreamSynchronized failed"); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.h new file mode 100644 index 0000000000..34764cc739 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/batch_assign_gpu_kernel.h @@ -0,0 +1,46 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_KERNEL_H_ + +#include +#include +#include "plugin/device/gpu/kernel/rl/batch_assign_gpu_base.h" +#include "plugin/device/gpu/kernel/gpu_kernel.h" +#include "plugin/device/gpu/kernel/gpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class BatchAssignKernelMod : public BatchAssignBaseMod { + public: + BatchAssignKernelMod(); + ~BatchAssignKernelMod() = default; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + bool Init(const CNodePtr &kernel_node) override; + + private: + size_t elements_num_; + bool lock_; +}; + +MS_REG_GPU_KERNEL(BatchAssign, BatchAssignKernelMod) +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_BATCH_ASSIGN_GPU_KERNEL_H_