Browse Source

support manual block mapping config shift for higher performance

pull/67/head
dabaiji 5 years ago
parent
commit
8e54df1026
4 changed files with 30 additions and 12 deletions
  1. +21
    -6
      src/poly/schedule_pass_gpu/mapping_outer_band.cc
  2. +1
    -1
      src/poly/schedule_pass_gpu/mapping_outer_band.h
  3. +6
    -4
      src/poly/schedule_tree_util.cc
  4. +2
    -1
      src/poly/schedule_tree_util.h

+ 21
- 6
src/poly/schedule_pass_gpu/mapping_outer_band.cc View File

@@ -204,8 +204,9 @@ isl::schedule MappingOuterBand::DoThreadMapping(const isl::schedule &sch) {

if (node.has_parent() && node.parent().isa<isl::schedule_node_mark>()) {
const std::string &marker = node.parent().as<isl::schedule_node_mark>().get_id().get_name();
if (marker == "mind_trick_swizzle_marker")
if (marker == "mind_trick_swizzle_marker") {
return node;
}
}

size_t num_mapped_desc = NumMappedDescendant(thread_record, node);
@@ -691,7 +692,8 @@ std::pair<std::string, std::string> MappingOuterBand::GetC1C0BlockConfig(size_t
}

isl::schedule_node MappingOuterBand::MapBlockHelper(const isl::schedule_node &orig_node, MappingCfg *block_cfg,
size_t n_block_map, bool check_extent) {
size_t n_block_map, bool check_extent,
std::unordered_map<size_t, size_t> map_idx_shift) {
auto node = orig_node;
auto band_node = node.as<isl::schedule_node_band>();
if (!band_node || !band_node.permutable()) {
@@ -721,7 +723,7 @@ isl::schedule_node MappingOuterBand::MapBlockHelper(const isl::schedule_node &or
node = node.child(0);

Mapping mapping;
node = CreateAndInsertMapFilter(node, false, upa_list, block_cfg, mapping);
node = CreateAndInsertMapFilter(node, false, upa_list, block_cfg, mapping, map_idx_shift);
scop_info_.upa_node_mapping_.emplace_back(std::make_pair(node.parent(), mapping));

return node;
@@ -748,14 +750,26 @@ isl::schedule MappingOuterBand::DoBlockMapping(const isl::schedule &sch) {
}
// For scalar case that do not consider coincidence (reset during restart in pass mgr), there is usually only one
// member in outer band and we can map the maximal block size to that member.
if (n_block_map == 1 && n_block_map < block_cfg->bound && !scop_info_.user_config_.GetConsiderCoincidence()) {
bool need_shift = n_block_map < block_cfg->bound && !scop_info_.user_config_.GetConsiderCoincidence();
std::unordered_map<size_t, size_t> map_idx_shift;
if (need_shift) {
auto new_idx = 0;
for (size_t i = 0; i < block_cfg->bound; ++i) {
if (block_cfg->GetAt(i).second > block_cfg->GetAt(new_idx).second) {
new_idx = i;
}
}
block_cfg->SwapConfig(0, new_idx);
if (scop_info_.analysis_result_.GetEnabledAutoTiling()) {
// for auto configs, simply exchange the value of configs idx, for example:
// [before] bx = 1(map), by = 1024; [after] bx = 1024(map), by = 1
block_cfg->SwapConfig(0, new_idx);
} else {
// for manual configs, we need to use the user-specifed config idx, so that we can record the shifted idx and it
// will be used in CreateAndInsertMapFilter, for example:
// [before] bx = 1(map), by = 1024; [after] bx = 1, by = 1024(map);
map_idx_shift.insert({0, new_idx});
map_idx_shift.insert({new_idx, 0});
}
}

if (scop_info_.user_config_.GetEnableAtomicAdd() && NeedAtomicAdd(band_node, n_block_map)) {
@@ -778,7 +792,8 @@ isl::schedule MappingOuterBand::DoBlockMapping(const isl::schedule &sch) {

// Step 3. Map outer-most band for c1 tile as usual (and do not check extent when c0 tile is applied manually).
auto map_c0_block = c0_block_cfg != nullptr;
node = MapBlockHelper(node, c1_block_cfg, n_block_map, !map_c0_block);
bool check_extent = !map_c0_block && map_idx_shift.empty();
node = MapBlockHelper(node, c1_block_cfg, n_block_map, check_extent, map_idx_shift);
auto final_schedule = node.get_schedule();

// Step 4. Map middle-level band (i.e. c0 tile band).


+ 1
- 1
src/poly/schedule_pass_gpu/mapping_outer_band.h View File

@@ -43,7 +43,7 @@ class MappingOuterBand : public SchedulePass {
bool NeedAtomicAdd(const isl::schedule_node_band &band, size_t n_block_map);
void MarkAtomicAddTensor(const isl::schedule_node_band &band);
isl::schedule_node MapBlockHelper(const isl::schedule_node &node, MappingCfg *block_cfg, size_t n_block_map,
bool check_extent);
bool check_extent, std::unordered_map<size_t, size_t> map_idx_shift = {});

isl::schedule DoThreadMapping(const isl::schedule &sch);
size_t MapThreadHelper(isl::schedule_node &thread_root);


+ 6
- 4
src/poly/schedule_tree_util.cc View File

@@ -306,8 +306,8 @@ std::pair<isl::schedule_node, isl::schedule_node> MapInnerDimToThreads(const isl
}

isl::schedule_node CreateAndInsertMapFilter(const isl::schedule_node &node, const bool is_promotion,
isl::union_pw_aff_list upa_list, MappingCfg *mapping_cfg,
Mapping &mapping) {
isl::union_pw_aff_list upa_list, MappingCfg *mapping_cfg, Mapping &mapping,
std::unordered_map<size_t, size_t> map_idx_shift) {
// create mapping filter
CHECK(mapping_cfg != nullptr) << "threadconfig is null";

@@ -317,7 +317,8 @@ isl::schedule_node CreateAndInsertMapFilter(const isl::schedule_node &node, cons
}
size_t num_map = upa_list.size();
for (size_t i = 0; i < num_map; ++i) {
std::pair<std::string, int> cfg = mapping_cfg->GetAt(i);
auto map_id = map_idx_shift.find(i) != map_idx_shift.end() ? map_idx_shift[i] : i;
std::pair<std::string, int> cfg = mapping_cfg->GetAt(map_id);
if (cfg.first.find(WARP_COMPUTE) != std::string::npos && cfg.second == MAPPING_INVALID_WARP && is_promotion) {
continue;
}
@@ -331,7 +332,8 @@ isl::schedule_node CreateAndInsertMapFilter(const isl::schedule_node &node, cons
for (size_t i = num_map; i < mapping_cfg->bound; ++i) {
CHECK(!domain.is_null());
auto universe = domain.universe();
std::pair<std::string, int> cfg = mapping_cfg->GetAt(i);
auto map_id = map_idx_shift.find(i) != map_idx_shift.end() ? map_idx_shift[i] : i;
std::pair<std::string, int> cfg = mapping_cfg->GetAt(map_id);
auto id = isl::id(node.ctx(), cfg.first);
mapping[id] = isl::union_pw_aff(universe, isl::val::zero(domain.ctx()));
}


+ 2
- 1
src/poly/schedule_tree_util.h View File

@@ -99,7 +99,8 @@ std::pair<isl::schedule_node, isl::schedule_node> MapInnerDimToThreads(const isl
const bool is_promotion, MappingCfg *mapping_cfg,
Mapping &mapping, bool need_coalesce);
isl::schedule_node CreateAndInsertMapFilter(const isl::schedule_node &node, const bool is_promotion,
isl::union_pw_aff_list upa_list, MappingCfg *mapping_cfg, Mapping &mapping);
isl::union_pw_aff_list upa_list, MappingCfg *mapping_cfg, Mapping &mapping,
std::unordered_map<size_t, size_t> map_idx_shift = {});
isl::schedule_node CheckMapSizeAndApplyTile(const isl::schedule_node &thread_root,
const isl::union_pw_aff_list &aff_list, MappingCfg *mapping_cfg,
bool need_coalesce);


Loading…
Cancel
Save