Merge pull request !26518 from chenweifeng/tag-env-implementtags/v1.6.0
| @@ -0,0 +1,339 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh" | |||
| #include <assert.h> | |||
| #include <algorithm> | |||
| __global__ void InitKernel(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state) { | |||
| for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < env_num * agent_num; i += gridDim.x * blockDim.x) { | |||
| curand_init(setting->seed, i, 0, &agent_state->rand_state[i]); | |||
| } | |||
| } | |||
| void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state, | |||
| cudaStream_t stream) { | |||
| InitKernel<<<(env_num * agent_num + 255) / 256, 256, 0, stream>>>(env_num, agent_num, setting, state); | |||
| } | |||
| __global__ void ResetKernel(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, | |||
| float *state) { | |||
| // Reset the agent state | |||
| for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < env_num * agent_num; | |||
| gaid += gridDim.x * blockDim.x) { | |||
| const int eid = gaid / agent_num; | |||
| const int aid = gaid % agent_num; | |||
| // Static reset. | |||
| bool is_prey = (aid >= setting->predator_num); | |||
| int x = is_prey ? 0 : 0.5 * setting->map_width; | |||
| int y = is_prey ? 0 : 0.5 * setting->map_length; | |||
| // Random reset. | |||
| // auto local_state = agent_state->rand_state[gaid]; | |||
| // // curand_uniform result [0, 1), cache x y to registers. | |||
| // int x = static_cast<int>(curand_uniform(&local_state) * setting->map_width); | |||
| // int y = static_cast<int>(curand_uniform(&local_state) * setting->map_length); | |||
| // agent_state->rand_state[gaid] = local_state; | |||
| agent_state->loc_x[gaid] = x; | |||
| agent_state->loc_y[gaid] = y; | |||
| agent_state->still_in_game[gaid] = true; | |||
| agent_state->time_step[eid] = 0; | |||
| agent_state->prey_left[eid] = setting->prey_num; | |||
| const int state_size_per_agent = agent_num * kFeatureNum + 1; | |||
| const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum; | |||
| for (int id = 0; id < agent_num; id++) { | |||
| size_t offset = base + id * state_size_per_agent; | |||
| state[offset] = static_cast<float>(x) / setting->map_width; | |||
| state[offset + 1] = static_cast<float>(y) / setting->map_length; | |||
| state[offset + 2] = aid >= setting->predator_num; | |||
| state[offset + 3] = aid == id; | |||
| } | |||
| state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = 0; | |||
| } | |||
| } | |||
| __global__ void ResetIfDoneKernel(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state, float *state, const bool *done) { | |||
| for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < env_num * agent_num; | |||
| gaid += gridDim.x * blockDim.x) { | |||
| const int eid = gaid / agent_num; | |||
| const int aid = gaid % agent_num; | |||
| if (done[eid]) { | |||
| bool is_prey = (aid >= setting->predator_num); | |||
| int x = is_prey ? 0 : 0.5 * setting->map_width; | |||
| int y = is_prey ? 0 : 0.5 * setting->map_length; | |||
| agent_state->loc_x[gaid] = x; | |||
| agent_state->loc_y[gaid] = y; | |||
| agent_state->still_in_game[gaid] = true; | |||
| agent_state->time_step[eid] = 0; | |||
| agent_state->prey_left[eid] = setting->prey_num; | |||
| const int state_size_per_agent = agent_num * kFeatureNum + 1; | |||
| const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum; | |||
| for (int id = 0; id < agent_num; id++) { | |||
| size_t offset = base + id * state_size_per_agent; | |||
| state[offset] = static_cast<float>(x) / setting->map_width; | |||
| state[offset + 1] = static_cast<float>(y) / setting->map_length; | |||
| state[offset + 2] = aid >= setting->predator_num; | |||
| state[offset + 3] = aid == id; | |||
| } | |||
| state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = 0; | |||
| } | |||
| } | |||
| } | |||
| void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state, | |||
| cudaStream_t stream) { | |||
| ResetKernel<<<(env_num * agent_num + 255) / 256, 256, 0, stream>>>(env_num, agent_num, setting, agent_state, state); | |||
| } | |||
| __global__ void StepBindBlockKernel(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state, const int *action, float *state, float *reward, | |||
| bool *done) { | |||
| __shared__ int team_reward; | |||
| extern __shared__ int agent_loc[]; | |||
| int *loc_x = agent_loc; | |||
| int *loc_y = agent_loc + agent_num; | |||
| int eid = blockIdx.x; | |||
| for (int aid = threadIdx.x; aid < agent_num; aid += blockDim.x) { | |||
| int gaid = eid * agent_num + aid; | |||
| float agent_reward = 0.0; | |||
| // Parse discrete action. | |||
| int action_offset = action[gaid] * 2; | |||
| assert(action_offset <= 8); | |||
| int action_x = setting->index_to_action[action_offset]; | |||
| int action_y = setting->index_to_action[action_offset + 1]; | |||
| // Update agent location. | |||
| int x = agent_state->loc_x[gaid] + action_x; | |||
| int y = agent_state->loc_y[gaid] + action_y; | |||
| int map_width = setting->map_width; | |||
| int map_length = setting->map_length; | |||
| if (x < 0 || y < 0 || x > map_width || y > map_length) { | |||
| x = min(max(0, x), setting->map_width); | |||
| y = min(max(0, y), setting->map_length); | |||
| agent_reward -= setting->wall_hit_penalty; | |||
| } | |||
| loc_x[aid] = x; | |||
| loc_y[aid] = y; | |||
| agent_state->loc_x[gaid] = x; | |||
| agent_state->loc_y[gaid] = y; | |||
| // Update time step | |||
| if (aid == 0) { | |||
| team_reward = 0; | |||
| agent_state->time_step[eid]++; | |||
| } | |||
| __syncthreads(); | |||
| // Calculate team reward. | |||
| bool is_prey = aid >= setting->predator_num; | |||
| if (is_prey && agent_state->still_in_game[gaid]) { | |||
| for (int tid = 0; tid < setting->predator_num; tid++) { | |||
| // Every prey only caught by one predator. | |||
| if (x == loc_x[tid] && y == loc_y[tid]) { | |||
| agent_state->still_in_game[gaid] = false; | |||
| agent_state->prey_left[eid]--; | |||
| atomicAdd(&team_reward, 1); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| __syncthreads(); | |||
| // Construct observation. | |||
| const int state_size_per_agent = agent_num * kFeatureNum + 1; | |||
| const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum; | |||
| for (int id = 0; id < agent_num; id++) { | |||
| size_t offset = base + id * state_size_per_agent; | |||
| state[offset] = static_cast<float>(x) / map_width; | |||
| state[offset + 1] = static_cast<float>(y) / map_length; | |||
| state[offset + 2] = is_prey; | |||
| state[offset + 3] = (aid == id); | |||
| } | |||
| state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = | |||
| static_cast<float>(agent_state->time_step[eid]) / setting->max_timestep; | |||
| if (team_reward > 0) { | |||
| agent_reward += is_prey ? -setting->caught_penalty * team_reward : setting->catch_reward * team_reward; | |||
| } else { | |||
| agent_reward += is_prey ? setting->step_cost : -setting->step_cost; | |||
| } | |||
| reward[gaid] = agent_reward * agent_state->still_in_game[gaid]; | |||
| if (aid == 0) { | |||
| done[eid] = (agent_state->time_step[eid] >= setting->max_timestep) || (agent_state->prey_left[eid] == 0); | |||
| } | |||
| } | |||
| } | |||
| void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, | |||
| const int *action, float *state, float *reward, bool *done, cudaStream_t stream) { | |||
| size_t shm_size = env_num * agent_num * sizeof(float) * 2; | |||
| StepBindBlockKernel<<<env_num, 256, shm_size, stream>>>(env_num, agent_num, setting, agent_state, action, state, | |||
| reward, done); | |||
| int block_dim = 256; | |||
| int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim; | |||
| ResetIfDoneKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, done); | |||
| } | |||
| __global__ void UpdateAgentLoc(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state, const int *action, float *state, float *reward) { | |||
| int total_agent = env_num * agent_num; | |||
| for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent; gaid += gridDim.x * blockDim.x) { | |||
| const int eid = gaid / agent_num; | |||
| const int aid = gaid % agent_num; | |||
| // Parse discrete action. | |||
| int action_offset = action[gaid] * 2; | |||
| assert(action_offset <= 8); | |||
| int action_x = setting->index_to_action[action_offset]; | |||
| int action_y = setting->index_to_action[action_offset + 1]; | |||
| // Update agent location | |||
| int x = agent_state->loc_x[gaid] + action_x; | |||
| int y = agent_state->loc_y[gaid] + action_y; | |||
| int map_width = setting->map_width; | |||
| int map_length = setting->map_length; | |||
| reward[gaid] = 0.0; | |||
| if (x < 0 || y < 0 || x > map_width || y > map_length) { | |||
| x = min(max(0, x), setting->map_width); | |||
| y = min(max(0, y), setting->map_length); | |||
| reward[gaid] -= setting->wall_hit_penalty; | |||
| } | |||
| agent_state->loc_x[gaid] = x; | |||
| agent_state->loc_y[gaid] = y; | |||
| if (aid == 0) { | |||
| agent_state->time_step[eid]++; | |||
| } | |||
| } | |||
| } | |||
| __global__ void CalcTeamReward(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state, float *team_reward) { | |||
| const int prey_num_per_env = setting->prey_num; | |||
| const int total_prey_num = env_num * prey_num_per_env; | |||
| for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < total_prey_num; i += gridDim.x * blockDim.x) { | |||
| const int eid = i / prey_num_per_env; | |||
| const int rid = eid * agent_num + i % prey_num_per_env + setting->predator_num; | |||
| if (agent_state->still_in_game[rid]) { | |||
| int x = agent_state->loc_x[rid]; | |||
| int y = agent_state->loc_y[rid]; | |||
| for (int j = 0; j < setting->predator_num; j++) { | |||
| int tid = eid * agent_num + j; | |||
| if (x == agent_state->loc_x[tid] && y == agent_state->loc_y[tid]) { | |||
| agent_state->still_in_game[rid] = false; | |||
| agent_state->prey_left[eid]--; | |||
| atomicAdd(&team_reward[eid], 1); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| __global__ void CalcAgentReward(const int env_num, const int agent_num, const GameSetting *setting, | |||
| AgentState *agent_state, float *state, float *reward, bool *done, float *team_reward) { | |||
| int total_agent_num = env_num * agent_num; | |||
| for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent_num; gaid += gridDim.x * blockDim.x) { | |||
| int eid = gaid / agent_num; | |||
| int aid = gaid % agent_num; | |||
| bool is_prey = aid >= setting->predator_num; | |||
| if (team_reward[eid] > 0) { | |||
| reward[gaid] += is_prey ? -team_reward[eid] * setting->caught_penalty : team_reward[eid] * setting->catch_reward; | |||
| } else { | |||
| reward[gaid] += is_prey ? setting->step_cost : -setting->step_cost; | |||
| } | |||
| // Construct observation. | |||
| int x = agent_state->loc_x[gaid]; | |||
| int y = agent_state->loc_y[gaid]; | |||
| int map_width = setting->map_width; | |||
| int map_length = setting->map_length; | |||
| const int state_size_per_agent = agent_num * kFeatureNum + 1; | |||
| const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum; | |||
| for (int id = 0; id < agent_num; id++) { | |||
| size_t offset = base + id * state_size_per_agent; | |||
| state[offset] = static_cast<float>(x) / map_width; | |||
| state[offset + 1] = static_cast<float>(y) / map_length; | |||
| state[offset + 2] = is_prey; | |||
| state[offset + 3] = (aid == id); | |||
| } | |||
| state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = | |||
| static_cast<float>(agent_state->time_step[eid]) / setting->max_timestep; | |||
| if (aid == 0) { | |||
| done[eid] = (agent_state->time_step[eid] >= setting->max_timestep) || (agent_state->prey_left[eid] == 0); | |||
| } | |||
| } | |||
| } | |||
| void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, | |||
| const int *action, float *state, float *reward, bool *done, float *team_reward, | |||
| cudaStream_t stream) { | |||
| // Update agent location, construct observation, done. | |||
| int block_dim = 256; | |||
| int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim; | |||
| UpdateAgentLoc<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, action, state, reward); | |||
| // Calculate team reward. | |||
| cudaMemsetAsync(team_reward, 0, sizeof(float) * env_num, stream); | |||
| CalcTeamReward<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, team_reward); | |||
| // Calculate agent reward. | |||
| CalcAgentReward<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, reward, done, | |||
| team_reward); | |||
| // Reset the ended environment. | |||
| ResetIfDoneKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, done); | |||
| } | |||
| __global__ void AgentStateCopyKernel(const int env_num, const int agent_num, AgentState *dst, AgentState *src) { | |||
| int total_agent_num = env_num * agent_num; | |||
| for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent_num; gaid += gridDim.x * blockDim.x) { | |||
| int eid = gaid / total_agent_num; | |||
| int aid = gaid % total_agent_num; | |||
| dst->loc_x[gaid] = src->loc_x[gaid]; | |||
| dst->loc_y[gaid] = src->loc_y[gaid]; | |||
| dst->rand_state[gaid] = src->rand_state[gaid]; | |||
| dst->still_in_game[gaid] = src->still_in_game[gaid]; | |||
| if (aid == 0) { | |||
| dst->time_step[eid] = src->time_step[eid]; | |||
| dst->prey_left[eid] = src->prey_left[eid]; | |||
| } | |||
| } | |||
| } | |||
| void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream) { | |||
| int block_dim = 256; | |||
| int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim; | |||
| AgentStateCopyKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, dst, src); | |||
| } | |||
| @@ -0,0 +1,59 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_ | |||
| #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_ | |||
| #include <curand_kernel.h> | |||
| constexpr int kFeatureNum = 4; | |||
| struct GameSetting { | |||
| int seed; | |||
| int predator_num; | |||
| int prey_num; | |||
| int max_timestep; | |||
| int map_length; | |||
| int map_width; | |||
| float wall_hit_penalty; | |||
| float catch_reward; | |||
| float caught_penalty; | |||
| float step_cost; | |||
| int index_to_action[10] = {0, 0, 1, 0, -1, 0, 0, 1, 0, -1}; | |||
| }; | |||
| // Structure of array (short for SOA) for parallel. | |||
| // member shape: [env_num, agent_num] | |||
| struct AgentState { | |||
| int *loc_x; | |||
| int *loc_y; | |||
| curandState *rand_state; | |||
| bool *still_in_game; | |||
| int *time_step; | |||
| int *prey_left; | |||
| }; | |||
| void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state, | |||
| cudaStream_t stream); | |||
| void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state, | |||
| cudaStream_t stream); | |||
| void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, | |||
| const int *action, float *state, float *reward, bool *done, cudaStream_t stream); | |||
| void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, | |||
| const int *action, float *state, float *reward, bool *done, float *team_reward, | |||
| cudaStream_t stream); | |||
| void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream); | |||
| #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_ | |||
| @@ -46,6 +46,7 @@ class Environment { | |||
| virtual size_t StateSizeInBytes() = 0; | |||
| virtual size_t RewardSizeInBytes() = 0; | |||
| virtual size_t DoneSizeInBytes() = 0; | |||
| virtual size_t WorkspaceSizeInBytes() { return 0; } | |||
| }; | |||
| constexpr int64_t kInvalidHandle = -1; | |||
| @@ -0,0 +1,226 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/rl/tag_environment.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <map> | |||
| #include <utility> | |||
| #include "runtime/device/gpu/cuda_driver.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| namespace { | |||
| constexpr auto kSeedAttr = "seed"; | |||
| constexpr auto kPredatorNumAttr = "predator_num"; | |||
| constexpr auto kPreyNumrAttr = "prey_num"; | |||
| constexpr auto kMaxTimestepAttr = "max_timestep"; | |||
| constexpr auto kMapLengthAttr = "map_length"; | |||
| constexpr auto kMapWidthAttr = "map_width"; | |||
| constexpr auto kWallHitPenaltyAttr = "wall_hit_penalty"; | |||
| constexpr auto kCatchRewardAttr = "catch_reward"; | |||
| constexpr auto kCaughtPenaltyAttr = "catched_penalty"; | |||
| constexpr auto kStepCostAttr = "step_cost"; | |||
| constexpr auto kEnvNumAttr = "env_num"; | |||
| } // namespace | |||
| TagEnvironment::~TagEnvironment() { | |||
| auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance(); | |||
| allocator.FreeTensorMem(agent_state_device_); | |||
| allocator.FreeTensorMem(game_setting_device_); | |||
| FinalizeAgentState(agent_state_host_); | |||
| } | |||
| bool TagEnvironment::InitGameSetting(const CNodePtr &cnode, GameSetting *setting_host) { | |||
| MS_LOG(EXCEPTION) << "The `game_setting` should not be nullprt"; | |||
| setting_host->seed = AnfAlgo::GetNodeAttr<int64_t>(cnode, kSeedAttr); | |||
| setting_host->predator_num = AnfAlgo::GetNodeAttr<int64_t>(cnode, kPredatorNumAttr); | |||
| setting_host->prey_num = AnfAlgo::GetNodeAttr<int64_t>(cnode, kPreyNumrAttr); | |||
| setting_host->max_timestep = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMaxTimestepAttr); | |||
| setting_host->map_length = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMapLengthAttr); | |||
| setting_host->map_width = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMapWidthAttr); | |||
| setting_host->wall_hit_penalty = AnfAlgo::GetNodeAttr<float>(cnode, kWallHitPenaltyAttr); | |||
| setting_host->catch_reward = AnfAlgo::GetNodeAttr<float>(cnode, kCatchRewardAttr); | |||
| setting_host->caught_penalty = AnfAlgo::GetNodeAttr<float>(cnode, kCaughtPenaltyAttr); | |||
| setting_host->step_cost = AnfAlgo::GetNodeAttr<float>(cnode, kStepCostAttr); | |||
| env_num_ = AnfAlgo::GetNodeAttr<int64_t>(cnode, kEnvNumAttr); | |||
| agent_num_ = setting_host->predator_num + setting_host->prey_num; | |||
| return true; | |||
| } | |||
| bool TagEnvironment::InitAgentState(int predator_num, int prey_num, AgentState *agent_state) { | |||
| MS_LOG(EXCEPTION) << "The `state` should not be nullptr"; | |||
| int total_agents_num = env_num_ * agent_num_; | |||
| auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance(); | |||
| agent_state->loc_x = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * total_agents_num)); | |||
| agent_state->loc_y = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * total_agents_num)); | |||
| agent_state->still_in_game = static_cast<bool *>(allocator.AllocTensorMem(sizeof(bool) * total_agents_num)); | |||
| agent_state->rand_state = | |||
| static_cast<curandState *>(allocator.AllocTensorMem(sizeof(curandState) * total_agents_num)); | |||
| agent_state->time_step = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * env_num_)); | |||
| agent_state->prey_left = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * env_num_)); | |||
| return true; | |||
| } | |||
| bool TagEnvironment::FinalizeAgentState(const AgentState &agent_setting) { | |||
| MS_LOG(EXCEPTION) << "The `state` should not be nullptr"; | |||
| auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance(); | |||
| allocator.FreeTensorMem(agent_setting.prey_left); | |||
| allocator.FreeTensorMem(agent_setting.time_step); | |||
| allocator.FreeTensorMem(agent_setting.still_in_game); | |||
| allocator.FreeTensorMem(agent_setting.rand_state); | |||
| allocator.FreeTensorMem(agent_setting.loc_x); | |||
| allocator.FreeTensorMem(agent_setting.loc_y); | |||
| return true; | |||
| } | |||
| bool TagEnvironment::Init(const CNodePtr &cnode, void *stream_ptr) { | |||
| InitGameSetting(cnode, &game_setting_host_); | |||
| InitAgentState(game_setting_host_.predator_num, game_setting_host_.prey_num, &agent_state_host_); | |||
| // Move the game setting to device. | |||
| auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance(); | |||
| game_setting_device_ = static_cast<GameSetting *>(allocator.AllocTensorMem(sizeof(GameSetting))); | |||
| CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE( | |||
| cudaMemcpyAsync(game_setting_device_, &game_setting_host_, sizeof(GameSetting), cudaMemcpyHostToDevice, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cudaMemcpy failed."); | |||
| // Move the agent state to device. | |||
| agent_state_device_ = static_cast<AgentState *>(allocator.AllocTensorMem(sizeof(AgentState))); | |||
| CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE( | |||
| cudaMemcpyAsync(agent_state_device_, &agent_state_host_, sizeof(AgentState), cudaMemcpyHostToDevice, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)), | |||
| "cudaMemcpy failed."); | |||
| InitEnv(env_num_, agent_num_, game_setting_device_, agent_state_device_, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| bool TagEnvironment::Reset(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||
| auto state = reinterpret_cast<float *>(outputs[0]->addr); | |||
| ResetEnv(env_num_, agent_num_, game_setting_device_, agent_state_device_, state, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| return true; | |||
| } | |||
| bool TagEnvironment::Step(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) { | |||
| auto action = reinterpret_cast<int *>(inputs[0]->addr); | |||
| auto state = reinterpret_cast<float *>(outputs[0]->addr); | |||
| auto reward = reinterpret_cast<float *>(outputs[1]->addr); | |||
| auto done = reinterpret_cast<bool *>(outputs[2]->addr); | |||
| auto team_reward = reinterpret_cast<float *>(workspace[0]->addr); | |||
| StepKernelProfiling(action, state, reward, done, team_reward, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| if (optimal_kernel_ == kBindBlock) { | |||
| StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device_, action, state, reward, done, | |||
| reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| } else { | |||
| StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device_, action, state, reward, done, | |||
| team_reward_, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| } | |||
| return true; | |||
| } | |||
| size_t TagEnvironment::ActionSizeInBytes() { | |||
| // Action with shape (env_num, agent_num, movement) | |||
| return env_num_ * agent_num_ * sizeof(int); | |||
| } | |||
| size_t TagEnvironment::StateSizeInBytes() { | |||
| // State with shape (env_num, agent_num, agent_num * position) | |||
| return env_num_ * agent_num_ * (agent_num_ * kFeatureNum + 1) * sizeof(float); | |||
| } | |||
| size_t TagEnvironment::RewardSizeInBytes() { | |||
| // Reward with shape (env_num, agent_num, reward) | |||
| return env_num_ * agent_num_ * sizeof(float); | |||
| } | |||
| size_t TagEnvironment::WorkspaceSizeInBytes() { | |||
| // Team reward with shape (env_num,) | |||
| return sizeof(float) * env_num_; | |||
| } | |||
| size_t TagEnvironment::DoneSizeInBytes() { return env_num_ * sizeof(bool); } | |||
| void TagEnvironment::StepKernelProfiling(const int *action, float *state, float *reward, bool *done, float *team_reward, | |||
| cudaStream_t stream) { | |||
| if (!enable_profiling_) { | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "Start Tag environment profiling."; | |||
| // Prepare agent state for profiling. | |||
| AgentState agent_state; | |||
| InitAgentState(game_setting_host_.predator_num, game_setting_host_.prey_num, &agent_state); | |||
| auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance(); | |||
| AgentState *agent_state_device = static_cast<AgentState *>(allocator.AllocTensorMem(sizeof(AgentState))); | |||
| CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE( | |||
| cudaMemcpyAsync(agent_state_device, &agent_state, sizeof(AgentState), cudaMemcpyHostToDevice, stream), | |||
| "cudaMemcpy failed."); | |||
| AgentStateCopy(env_num_, agent_num_, agent_state_device, agent_state_device_, stream); | |||
| // Warmup | |||
| StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, stream); | |||
| StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, | |||
| team_reward_, stream); | |||
| // Collect profiling info | |||
| device::gpu::CudaDeviceStream start = nullptr; | |||
| device::gpu::CudaDeviceStream end = nullptr; | |||
| float bind_cost = 0; | |||
| float cross_cost = 0; | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::CreateEvent(&start), "Failed to create event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::CreateEvent(&end), "Failed to create event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(start, stream), "Failed to record event to stream."); | |||
| StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, stream); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(end, stream), "Failed to record event to stream."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(start), "Failed to sync event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(end), "Failed to sync event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::ElapsedTime(&bind_cost, start, end), "Record time failed."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(start, stream), "Failed to record event to stream."); | |||
| StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, | |||
| team_reward_, stream); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(end, stream), "Failed to record event to stream."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(start), "Failed to sync event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(end), "Failed to sync event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::ElapsedTime(&cross_cost, start, end), "Record time failed."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::DestroyEvent(start), "Failed to destroy event."); | |||
| CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::DestroyEvent(end), "Failed to destroy event."); | |||
| // Select optimal kernel | |||
| optimal_kernel_ = bind_cost < cross_cost ? kBindBlock : kCrossBlock; | |||
| // Free tmp agent state | |||
| allocator.FreeTensorMem(agent_state_device); | |||
| FinalizeAgentState(agent_state); | |||
| MS_LOG(INFO) << "Tag environment profiling finish. Bind cost: " << bind_cost << ", cross cost: " << cross_cost; | |||
| enable_profiling_ = false; | |||
| } | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,77 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_ | |||
| #include <vector> | |||
| #include "backend/kernel_compiler/gpu/rl/environment_factory.h" | |||
| #include "backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| // Class for Tag environment. | |||
| // Tag is a multi-agent reinforcement learning environment. | |||
| // It is suppose that the predators learn cooperative stategy (for example surround) to catch the prey. | |||
| // The predators try to catch the prey. All of predators will get same reward when they catch the prey. | |||
| // Tag environment uses discrete action space(still, left, right, up, down), and result observations | |||
| // including agent location information. The tag environment supports multiple instances to speed sample collection. | |||
| // It also supports auto performance profiling and cuda-kernel selection. | |||
| class TagEnvironment : public Environment { | |||
| public: | |||
| TagEnvironment() = default; | |||
| ~TagEnvironment(); | |||
| // Init environment. Parse environment setting, create device memory for environment setting and agent state etc. | |||
| bool Init(const CNodePtr &cnode, void *stream_ptr) override; | |||
| // Reset environment state include agent location and time step. | |||
| bool Reset(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||
| // Execute time step. | |||
| bool Step(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override; | |||
| size_t ActionSizeInBytes() override; | |||
| size_t StateSizeInBytes() override; | |||
| size_t RewardSizeInBytes() override; | |||
| size_t DoneSizeInBytes() override; | |||
| size_t WorkspaceSizeInBytes() override; | |||
| private: | |||
| // The GameSetting and AgentState are used in C-like compiling environment, use C style resource managerment. | |||
| bool InitGameSetting(const CNodePtr &cnode, GameSetting *setting_host); | |||
| bool InitAgentState(int predator_num, int prey_num, AgentState *agent_state); | |||
| bool FinalizeAgentState(const AgentState &agent_state); | |||
| int env_num_ = 0; | |||
| int agent_num_ = 0; | |||
| GameSetting game_setting_host_; | |||
| GameSetting *game_setting_device_ = nullptr; | |||
| AgentState agent_state_host_; | |||
| AgentState *agent_state_device_; | |||
| float *team_reward_ = nullptr; | |||
| enum StepKernelType { kBindBlock = 0, kCrossBlock }; | |||
| void StepKernelProfiling(const int *action, float *state, float *reward, bool *done, float *team_reward, | |||
| cudaStream_t stream); | |||
| int enable_profiling_ = true; | |||
| StepKernelType optimal_kernel_ = kBindBlock; | |||
| }; | |||
| MS_REG_GPU_ENV(Tag, TagEnvironment) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_ | |||