Browse Source

!26518 tag environment implement

Merge pull request !26518 from chenweifeng/tag-env-implement
tags/v1.6.0
i-robot Gitee 4 years ago
parent
commit
0d11abc7c2
5 changed files with 702 additions and 0 deletions
  1. +339
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cu
  2. +59
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh
  3. +1
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/rl/environment_factory.h
  4. +226
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tag_environment.cc
  5. +77
    -0
      mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tag_environment.h

+ 339
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cu View File

@@ -0,0 +1,339 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh"
#include <assert.h>
#include <algorithm>

__global__ void InitKernel(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < env_num * agent_num; i += gridDim.x * blockDim.x) {
curand_init(setting->seed, i, 0, &agent_state->rand_state[i]);
}
}

void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state,
cudaStream_t stream) {
InitKernel<<<(env_num * agent_num + 255) / 256, 256, 0, stream>>>(env_num, agent_num, setting, state);
}

__global__ void ResetKernel(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
float *state) {
// Reset the agent state
for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < env_num * agent_num;
gaid += gridDim.x * blockDim.x) {
const int eid = gaid / agent_num;
const int aid = gaid % agent_num;

// Static reset.
bool is_prey = (aid >= setting->predator_num);
int x = is_prey ? 0 : 0.5 * setting->map_width;
int y = is_prey ? 0 : 0.5 * setting->map_length;

// Random reset.
// auto local_state = agent_state->rand_state[gaid];
// // curand_uniform result [0, 1), cache x y to registers.
// int x = static_cast<int>(curand_uniform(&local_state) * setting->map_width);
// int y = static_cast<int>(curand_uniform(&local_state) * setting->map_length);
// agent_state->rand_state[gaid] = local_state;

agent_state->loc_x[gaid] = x;
agent_state->loc_y[gaid] = y;
agent_state->still_in_game[gaid] = true;
agent_state->time_step[eid] = 0;
agent_state->prey_left[eid] = setting->prey_num;

const int state_size_per_agent = agent_num * kFeatureNum + 1;
const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum;
for (int id = 0; id < agent_num; id++) {
size_t offset = base + id * state_size_per_agent;
state[offset] = static_cast<float>(x) / setting->map_width;
state[offset + 1] = static_cast<float>(y) / setting->map_length;
state[offset + 2] = aid >= setting->predator_num;
state[offset + 3] = aid == id;
}
state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = 0;
}
}

__global__ void ResetIfDoneKernel(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state, float *state, const bool *done) {
for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < env_num * agent_num;
gaid += gridDim.x * blockDim.x) {
const int eid = gaid / agent_num;
const int aid = gaid % agent_num;

if (done[eid]) {
bool is_prey = (aid >= setting->predator_num);
int x = is_prey ? 0 : 0.5 * setting->map_width;
int y = is_prey ? 0 : 0.5 * setting->map_length;

agent_state->loc_x[gaid] = x;
agent_state->loc_y[gaid] = y;
agent_state->still_in_game[gaid] = true;
agent_state->time_step[eid] = 0;
agent_state->prey_left[eid] = setting->prey_num;

const int state_size_per_agent = agent_num * kFeatureNum + 1;
const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum;
for (int id = 0; id < agent_num; id++) {
size_t offset = base + id * state_size_per_agent;
state[offset] = static_cast<float>(x) / setting->map_width;
state[offset + 1] = static_cast<float>(y) / setting->map_length;
state[offset + 2] = aid >= setting->predator_num;
state[offset + 3] = aid == id;
}
state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] = 0;
}
}
}

void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state,
cudaStream_t stream) {
ResetKernel<<<(env_num * agent_num + 255) / 256, 256, 0, stream>>>(env_num, agent_num, setting, agent_state, state);
}

__global__ void StepBindBlockKernel(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state, const int *action, float *state, float *reward,
bool *done) {
__shared__ int team_reward;
extern __shared__ int agent_loc[];
int *loc_x = agent_loc;
int *loc_y = agent_loc + agent_num;

int eid = blockIdx.x;
for (int aid = threadIdx.x; aid < agent_num; aid += blockDim.x) {
int gaid = eid * agent_num + aid;
float agent_reward = 0.0;

// Parse discrete action.
int action_offset = action[gaid] * 2;
assert(action_offset <= 8);
int action_x = setting->index_to_action[action_offset];
int action_y = setting->index_to_action[action_offset + 1];

// Update agent location.
int x = agent_state->loc_x[gaid] + action_x;
int y = agent_state->loc_y[gaid] + action_y;

int map_width = setting->map_width;
int map_length = setting->map_length;
if (x < 0 || y < 0 || x > map_width || y > map_length) {
x = min(max(0, x), setting->map_width);
y = min(max(0, y), setting->map_length);
agent_reward -= setting->wall_hit_penalty;
}

loc_x[aid] = x;
loc_y[aid] = y;
agent_state->loc_x[gaid] = x;
agent_state->loc_y[gaid] = y;

// Update time step
if (aid == 0) {
team_reward = 0;
agent_state->time_step[eid]++;
}
__syncthreads();

// Calculate team reward.
bool is_prey = aid >= setting->predator_num;
if (is_prey && agent_state->still_in_game[gaid]) {
for (int tid = 0; tid < setting->predator_num; tid++) {
// Every prey only caught by one predator.
if (x == loc_x[tid] && y == loc_y[tid]) {
agent_state->still_in_game[gaid] = false;
agent_state->prey_left[eid]--;
atomicAdd(&team_reward, 1);
break;
}
}
}
__syncthreads();

// Construct observation.
const int state_size_per_agent = agent_num * kFeatureNum + 1;
const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum;
for (int id = 0; id < agent_num; id++) {
size_t offset = base + id * state_size_per_agent;
state[offset] = static_cast<float>(x) / map_width;
state[offset + 1] = static_cast<float>(y) / map_length;
state[offset + 2] = is_prey;
state[offset + 3] = (aid == id);
}
state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] =
static_cast<float>(agent_state->time_step[eid]) / setting->max_timestep;

if (team_reward > 0) {
agent_reward += is_prey ? -setting->caught_penalty * team_reward : setting->catch_reward * team_reward;
} else {
agent_reward += is_prey ? setting->step_cost : -setting->step_cost;
}

reward[gaid] = agent_reward * agent_state->still_in_game[gaid];

if (aid == 0) {
done[eid] = (agent_state->time_step[eid] >= setting->max_timestep) || (agent_state->prey_left[eid] == 0);
}
}
}

void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
const int *action, float *state, float *reward, bool *done, cudaStream_t stream) {
size_t shm_size = env_num * agent_num * sizeof(float) * 2;
StepBindBlockKernel<<<env_num, 256, shm_size, stream>>>(env_num, agent_num, setting, agent_state, action, state,
reward, done);

int block_dim = 256;
int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim;
ResetIfDoneKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, done);
}

__global__ void UpdateAgentLoc(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state, const int *action, float *state, float *reward) {
int total_agent = env_num * agent_num;
for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent; gaid += gridDim.x * blockDim.x) {
const int eid = gaid / agent_num;
const int aid = gaid % agent_num;
// Parse discrete action.
int action_offset = action[gaid] * 2;
assert(action_offset <= 8);
int action_x = setting->index_to_action[action_offset];
int action_y = setting->index_to_action[action_offset + 1];

// Update agent location
int x = agent_state->loc_x[gaid] + action_x;
int y = agent_state->loc_y[gaid] + action_y;

int map_width = setting->map_width;
int map_length = setting->map_length;
reward[gaid] = 0.0;
if (x < 0 || y < 0 || x > map_width || y > map_length) {
x = min(max(0, x), setting->map_width);
y = min(max(0, y), setting->map_length);
reward[gaid] -= setting->wall_hit_penalty;
}
agent_state->loc_x[gaid] = x;
agent_state->loc_y[gaid] = y;

if (aid == 0) {
agent_state->time_step[eid]++;
}
}
}

__global__ void CalcTeamReward(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state, float *team_reward) {
const int prey_num_per_env = setting->prey_num;
const int total_prey_num = env_num * prey_num_per_env;
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < total_prey_num; i += gridDim.x * blockDim.x) {
const int eid = i / prey_num_per_env;
const int rid = eid * agent_num + i % prey_num_per_env + setting->predator_num;

if (agent_state->still_in_game[rid]) {
int x = agent_state->loc_x[rid];
int y = agent_state->loc_y[rid];
for (int j = 0; j < setting->predator_num; j++) {
int tid = eid * agent_num + j;
if (x == agent_state->loc_x[tid] && y == agent_state->loc_y[tid]) {
agent_state->still_in_game[rid] = false;
agent_state->prey_left[eid]--;
atomicAdd(&team_reward[eid], 1);
}
}
}
}
}

__global__ void CalcAgentReward(const int env_num, const int agent_num, const GameSetting *setting,
AgentState *agent_state, float *state, float *reward, bool *done, float *team_reward) {
int total_agent_num = env_num * agent_num;
for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent_num; gaid += gridDim.x * blockDim.x) {
int eid = gaid / agent_num;
int aid = gaid % agent_num;
bool is_prey = aid >= setting->predator_num;
if (team_reward[eid] > 0) {
reward[gaid] += is_prey ? -team_reward[eid] * setting->caught_penalty : team_reward[eid] * setting->catch_reward;
} else {
reward[gaid] += is_prey ? setting->step_cost : -setting->step_cost;
}

// Construct observation.
int x = agent_state->loc_x[gaid];
int y = agent_state->loc_y[gaid];
int map_width = setting->map_width;
int map_length = setting->map_length;

const int state_size_per_agent = agent_num * kFeatureNum + 1;
const size_t base = eid * agent_num * state_size_per_agent + aid * kFeatureNum;
for (int id = 0; id < agent_num; id++) {
size_t offset = base + id * state_size_per_agent;
state[offset] = static_cast<float>(x) / map_width;
state[offset + 1] = static_cast<float>(y) / map_length;
state[offset + 2] = is_prey;
state[offset + 3] = (aid == id);
}
state[eid * agent_num * state_size_per_agent + aid * state_size_per_agent + agent_num * kFeatureNum] =
static_cast<float>(agent_state->time_step[eid]) / setting->max_timestep;

if (aid == 0) {
done[eid] = (agent_state->time_step[eid] >= setting->max_timestep) || (agent_state->prey_left[eid] == 0);
}
}
}

void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
const int *action, float *state, float *reward, bool *done, float *team_reward,
cudaStream_t stream) {
// Update agent location, construct observation, done.
int block_dim = 256;
int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim;
UpdateAgentLoc<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, action, state, reward);

// Calculate team reward.
cudaMemsetAsync(team_reward, 0, sizeof(float) * env_num, stream);
CalcTeamReward<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, team_reward);

// Calculate agent reward.
CalcAgentReward<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, reward, done,
team_reward);

// Reset the ended environment.
ResetIfDoneKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, setting, agent_state, state, done);
}

__global__ void AgentStateCopyKernel(const int env_num, const int agent_num, AgentState *dst, AgentState *src) {
int total_agent_num = env_num * agent_num;
for (size_t gaid = blockIdx.x * blockDim.x + threadIdx.x; gaid < total_agent_num; gaid += gridDim.x * blockDim.x) {
int eid = gaid / total_agent_num;
int aid = gaid % total_agent_num;
dst->loc_x[gaid] = src->loc_x[gaid];
dst->loc_y[gaid] = src->loc_y[gaid];
dst->rand_state[gaid] = src->rand_state[gaid];
dst->still_in_game[gaid] = src->still_in_game[gaid];

if (aid == 0) {
dst->time_step[eid] = src->time_step[eid];
dst->prey_left[eid] = src->prey_left[eid];
}
}
}

void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream) {
int block_dim = 256;
int grid_dim = (env_num * agent_num + block_dim - 1) / block_dim;
AgentStateCopyKernel<<<grid_dim, block_dim, 0, stream>>>(env_num, agent_num, dst, src);
}

+ 59
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh View File

@@ -0,0 +1,59 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_

#include <curand_kernel.h>

constexpr int kFeatureNum = 4;

struct GameSetting {
int seed;
int predator_num;
int prey_num;
int max_timestep;
int map_length;
int map_width;
float wall_hit_penalty;
float catch_reward;
float caught_penalty;
float step_cost;
int index_to_action[10] = {0, 0, 1, 0, -1, 0, 0, 1, 0, -1};
};

// Structure of array (short for SOA) for parallel.
// member shape: [env_num, agent_num]
struct AgentState {
int *loc_x;
int *loc_y;
curandState *rand_state;
bool *still_in_game;
int *time_step;
int *prey_left;
};

void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state,
cudaStream_t stream);
void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state,
cudaStream_t stream);
void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
const int *action, float *state, float *reward, bool *done, cudaStream_t stream);
void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
const int *action, float *state, float *reward, bool *done, float *team_reward,
cudaStream_t stream);
void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream);
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_

+ 1
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/rl/environment_factory.h View File

@@ -46,6 +46,7 @@ class Environment {
virtual size_t StateSizeInBytes() = 0;
virtual size_t RewardSizeInBytes() = 0;
virtual size_t DoneSizeInBytes() = 0;
virtual size_t WorkspaceSizeInBytes() { return 0; }
};

constexpr int64_t kInvalidHandle = -1;


+ 226
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tag_environment.cc View File

@@ -0,0 +1,226 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/rl/tag_environment.h"

#include <memory>
#include <string>
#include <vector>
#include <tuple>
#include <map>
#include <utility>
#include "runtime/device/gpu/cuda_driver.h"

namespace mindspore {
namespace kernel {
namespace {
constexpr auto kSeedAttr = "seed";
constexpr auto kPredatorNumAttr = "predator_num";
constexpr auto kPreyNumrAttr = "prey_num";
constexpr auto kMaxTimestepAttr = "max_timestep";
constexpr auto kMapLengthAttr = "map_length";
constexpr auto kMapWidthAttr = "map_width";
constexpr auto kWallHitPenaltyAttr = "wall_hit_penalty";
constexpr auto kCatchRewardAttr = "catch_reward";
constexpr auto kCaughtPenaltyAttr = "catched_penalty";
constexpr auto kStepCostAttr = "step_cost";
constexpr auto kEnvNumAttr = "env_num";
} // namespace

TagEnvironment::~TagEnvironment() {
auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance();
allocator.FreeTensorMem(agent_state_device_);
allocator.FreeTensorMem(game_setting_device_);
FinalizeAgentState(agent_state_host_);
}

bool TagEnvironment::InitGameSetting(const CNodePtr &cnode, GameSetting *setting_host) {
MS_LOG(EXCEPTION) << "The `game_setting` should not be nullprt";

setting_host->seed = AnfAlgo::GetNodeAttr<int64_t>(cnode, kSeedAttr);
setting_host->predator_num = AnfAlgo::GetNodeAttr<int64_t>(cnode, kPredatorNumAttr);
setting_host->prey_num = AnfAlgo::GetNodeAttr<int64_t>(cnode, kPreyNumrAttr);
setting_host->max_timestep = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMaxTimestepAttr);
setting_host->map_length = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMapLengthAttr);
setting_host->map_width = AnfAlgo::GetNodeAttr<int64_t>(cnode, kMapWidthAttr);
setting_host->wall_hit_penalty = AnfAlgo::GetNodeAttr<float>(cnode, kWallHitPenaltyAttr);
setting_host->catch_reward = AnfAlgo::GetNodeAttr<float>(cnode, kCatchRewardAttr);
setting_host->caught_penalty = AnfAlgo::GetNodeAttr<float>(cnode, kCaughtPenaltyAttr);
setting_host->step_cost = AnfAlgo::GetNodeAttr<float>(cnode, kStepCostAttr);

env_num_ = AnfAlgo::GetNodeAttr<int64_t>(cnode, kEnvNumAttr);
agent_num_ = setting_host->predator_num + setting_host->prey_num;
return true;
}

bool TagEnvironment::InitAgentState(int predator_num, int prey_num, AgentState *agent_state) {
MS_LOG(EXCEPTION) << "The `state` should not be nullptr";

int total_agents_num = env_num_ * agent_num_;
auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance();
agent_state->loc_x = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * total_agents_num));
agent_state->loc_y = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * total_agents_num));
agent_state->still_in_game = static_cast<bool *>(allocator.AllocTensorMem(sizeof(bool) * total_agents_num));
agent_state->rand_state =
static_cast<curandState *>(allocator.AllocTensorMem(sizeof(curandState) * total_agents_num));
agent_state->time_step = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * env_num_));
agent_state->prey_left = static_cast<int *>(allocator.AllocTensorMem(sizeof(int) * env_num_));
return true;
}

bool TagEnvironment::FinalizeAgentState(const AgentState &agent_setting) {
MS_LOG(EXCEPTION) << "The `state` should not be nullptr";
auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance();
allocator.FreeTensorMem(agent_setting.prey_left);
allocator.FreeTensorMem(agent_setting.time_step);
allocator.FreeTensorMem(agent_setting.still_in_game);
allocator.FreeTensorMem(agent_setting.rand_state);
allocator.FreeTensorMem(agent_setting.loc_x);
allocator.FreeTensorMem(agent_setting.loc_y);
return true;
}

bool TagEnvironment::Init(const CNodePtr &cnode, void *stream_ptr) {
InitGameSetting(cnode, &game_setting_host_);
InitAgentState(game_setting_host_.predator_num, game_setting_host_.prey_num, &agent_state_host_);

// Move the game setting to device.
auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance();
game_setting_device_ = static_cast<GameSetting *>(allocator.AllocTensorMem(sizeof(GameSetting)));
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(game_setting_device_, &game_setting_host_, sizeof(GameSetting), cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpy failed.");

// Move the agent state to device.
agent_state_device_ = static_cast<AgentState *>(allocator.AllocTensorMem(sizeof(AgentState)));
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(agent_state_device_, &agent_state_host_, sizeof(AgentState), cudaMemcpyHostToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpy failed.");

InitEnv(env_num_, agent_num_, game_setting_device_, agent_state_device_, reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

bool TagEnvironment::Reset(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto state = reinterpret_cast<float *>(outputs[0]->addr);
ResetEnv(env_num_, agent_num_, game_setting_device_, agent_state_device_, state,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}

bool TagEnvironment::Step(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto action = reinterpret_cast<int *>(inputs[0]->addr);
auto state = reinterpret_cast<float *>(outputs[0]->addr);
auto reward = reinterpret_cast<float *>(outputs[1]->addr);
auto done = reinterpret_cast<bool *>(outputs[2]->addr);
auto team_reward = reinterpret_cast<float *>(workspace[0]->addr);
StepKernelProfiling(action, state, reward, done, team_reward, reinterpret_cast<cudaStream_t>(stream_ptr));
if (optimal_kernel_ == kBindBlock) {
StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device_, action, state, reward, done,
reinterpret_cast<cudaStream_t>(stream_ptr));
} else {
StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device_, action, state, reward, done,
team_reward_, reinterpret_cast<cudaStream_t>(stream_ptr));
}
return true;
}

size_t TagEnvironment::ActionSizeInBytes() {
// Action with shape (env_num, agent_num, movement)
return env_num_ * agent_num_ * sizeof(int);
}

size_t TagEnvironment::StateSizeInBytes() {
// State with shape (env_num, agent_num, agent_num * position)
return env_num_ * agent_num_ * (agent_num_ * kFeatureNum + 1) * sizeof(float);
}

size_t TagEnvironment::RewardSizeInBytes() {
// Reward with shape (env_num, agent_num, reward)
return env_num_ * agent_num_ * sizeof(float);
}

size_t TagEnvironment::WorkspaceSizeInBytes() {
// Team reward with shape (env_num,)
return sizeof(float) * env_num_;
}

size_t TagEnvironment::DoneSizeInBytes() { return env_num_ * sizeof(bool); }

void TagEnvironment::StepKernelProfiling(const int *action, float *state, float *reward, bool *done, float *team_reward,
cudaStream_t stream) {
if (!enable_profiling_) {
return;
}

MS_LOG(INFO) << "Start Tag environment profiling.";

// Prepare agent state for profiling.
AgentState agent_state;
InitAgentState(game_setting_host_.predator_num, game_setting_host_.prey_num, &agent_state);
auto &allocator = device::gpu::GPUMemoryAllocator::GetInstance();
AgentState *agent_state_device = static_cast<AgentState *>(allocator.AllocTensorMem(sizeof(AgentState)));
CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(
cudaMemcpyAsync(agent_state_device, &agent_state, sizeof(AgentState), cudaMemcpyHostToDevice, stream),
"cudaMemcpy failed.");
AgentStateCopy(env_num_, agent_num_, agent_state_device, agent_state_device_, stream);

// Warmup
StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, stream);
StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done,
team_reward_, stream);

// Collect profiling info
device::gpu::CudaDeviceStream start = nullptr;
device::gpu::CudaDeviceStream end = nullptr;
float bind_cost = 0;
float cross_cost = 0;
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::CreateEvent(&start), "Failed to create event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::CreateEvent(&end), "Failed to create event.");

CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(start, stream), "Failed to record event to stream.");
StepBindBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done, stream);
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(end, stream), "Failed to record event to stream.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(start), "Failed to sync event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(end), "Failed to sync event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::ElapsedTime(&bind_cost, start, end), "Record time failed.");

CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(start, stream), "Failed to record event to stream.");
StepCrossBlock(env_num_, agent_num_, game_setting_device_, agent_state_device, action, state, reward, done,
team_reward_, stream);
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::RecordEvent(end, stream), "Failed to record event to stream.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(start), "Failed to sync event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::SyncEvent(end), "Failed to sync event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::ElapsedTime(&cross_cost, start, end), "Record time failed.");

CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::DestroyEvent(start), "Failed to destroy event.");
CHECK_OP_RET_WITH_EXCEPT(device::gpu::CudaDriver::DestroyEvent(end), "Failed to destroy event.");

// Select optimal kernel
optimal_kernel_ = bind_cost < cross_cost ? kBindBlock : kCrossBlock;

// Free tmp agent state
allocator.FreeTensorMem(agent_state_device);
FinalizeAgentState(agent_state);

MS_LOG(INFO) << "Tag environment profiling finish. Bind cost: " << bind_cost << ", cross cost: " << cross_cost;
enable_profiling_ = false;
}
} // namespace kernel
} // namespace mindspore

+ 77
- 0
mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tag_environment.h View File

@@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_

#include <vector>
#include "backend/kernel_compiler/gpu/rl/environment_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/rl/tag_env_impl.cuh"

namespace mindspore {
namespace kernel {
// Class for Tag environment.
// Tag is a multi-agent reinforcement learning environment.
// It is suppose that the predators learn cooperative stategy (for example surround) to catch the prey.
// The predators try to catch the prey. All of predators will get same reward when they catch the prey.
// Tag environment uses discrete action space(still, left, right, up, down), and result observations
// including agent location information. The tag environment supports multiple instances to speed sample collection.
// It also supports auto performance profiling and cuda-kernel selection.
class TagEnvironment : public Environment {
public:
TagEnvironment() = default;
~TagEnvironment();

// Init environment. Parse environment setting, create device memory for environment setting and agent state etc.
bool Init(const CNodePtr &cnode, void *stream_ptr) override;
// Reset environment state include agent location and time step.
bool Reset(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
// Execute time step.
bool Step(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
size_t ActionSizeInBytes() override;
size_t StateSizeInBytes() override;
size_t RewardSizeInBytes() override;
size_t DoneSizeInBytes() override;
size_t WorkspaceSizeInBytes() override;

private:
// The GameSetting and AgentState are used in C-like compiling environment, use C style resource managerment.
bool InitGameSetting(const CNodePtr &cnode, GameSetting *setting_host);
bool InitAgentState(int predator_num, int prey_num, AgentState *agent_state);
bool FinalizeAgentState(const AgentState &agent_state);

int env_num_ = 0;
int agent_num_ = 0;
GameSetting game_setting_host_;
GameSetting *game_setting_device_ = nullptr;
AgentState agent_state_host_;
AgentState *agent_state_device_;
float *team_reward_ = nullptr;

enum StepKernelType { kBindBlock = 0, kCrossBlock };
void StepKernelProfiling(const int *action, float *state, float *reward, bool *done, float *team_reward,
cudaStream_t stream);
int enable_profiling_ = true;
StepKernelType optimal_kernel_ = kBindBlock;
};

MS_REG_GPU_ENV(Tag, TagEnvironment)
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TAG_ENV_KERNEL_H_

Loading…
Cancel
Save