GitOrigin-RevId: 0eba678f2b
tags/v1.7.0
| @@ -13,7 +13,7 @@ | |||||
| #if defined(_WIN32) | #if defined(_WIN32) | ||||
| #include <io.h> | #include <io.h> | ||||
| #define F_OK 0 | |||||
| #define F_OK 0 | |||||
| #define access(a, b) _access(a, b) | #define access(a, b) _access(a, b) | ||||
| #elif __linux__ || __unix__ || __APPLE__ | #elif __linux__ || __unix__ || __APPLE__ | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| @@ -32,8 +32,9 @@ public: | |||||
| template <typename T> | template <typename T> | ||||
| void read(T& val) { | void read(T& val) { | ||||
| static_assert(std::is_trivially_copyable<T>::value, | |||||
| "only support trivially copyable type"); | |||||
| static_assert( | |||||
| std::is_trivially_copyable<T>::value, | |||||
| "only support trivially copyable type"); | |||||
| mgb_assert(m_offset + sizeof(T) <= m_size); | mgb_assert(m_offset + sizeof(T) <= m_size); | ||||
| memcpy(&val, m_ptr, sizeof(T)); | memcpy(&val, m_ptr, sizeof(T)); | ||||
| m_offset += sizeof(T); | m_offset += sizeof(T); | ||||
| @@ -42,8 +43,9 @@ public: | |||||
| template <typename T> | template <typename T> | ||||
| void read(T* buf, size_t size) { | void read(T* buf, size_t size) { | ||||
| static_assert(std::is_trivially_copyable<T>::value && sizeof(T) == 1, | |||||
| "only support read bytes"); | |||||
| static_assert( | |||||
| std::is_trivially_copyable<T>::value && sizeof(T) == 1, | |||||
| "only support read bytes"); | |||||
| mgb_assert(m_offset + size <= m_size); | mgb_assert(m_offset + size <= m_size); | ||||
| memcpy(buf, m_ptr, size); | memcpy(buf, m_ptr, size); | ||||
| m_offset += size; | m_offset += size; | ||||
| @@ -67,20 +69,21 @@ public: | |||||
| template <typename T> | template <typename T> | ||||
| void read(T& val) { | void read(T& val) { | ||||
| static_assert(std::is_trivially_copyable<T>::value, | |||||
| "only support trivially copyable type"); | |||||
| static_assert( | |||||
| std::is_trivially_copyable<T>::value, | |||||
| "only support trivially copyable type"); | |||||
| auto ret = fread(&val, sizeof(T), 1, m_fp); | auto ret = fread(&val, sizeof(T), 1, m_fp); | ||||
| mgb_assert(ret == 1); | mgb_assert(ret == 1); | ||||
| } | } | ||||
| template <typename T> | template <typename T> | ||||
| void read(T* buf, size_t size) { | void read(T* buf, size_t size) { | ||||
| static_assert(std::is_trivially_copyable<T>::value && sizeof(T) == 1, | |||||
| "only support read bytes"); | |||||
| static_assert( | |||||
| std::is_trivially_copyable<T>::value && sizeof(T) == 1, | |||||
| "only support read bytes"); | |||||
| auto ret = fread(buf, size, 1, m_fp); | auto ret = fread(buf, size, 1, m_fp); | ||||
| mgb_assert(ret == 1); | mgb_assert(ret == 1); | ||||
| } | } | ||||
| }; | }; | ||||
| //////////////////////// InFilePersistentCache::OutputFile /////////////// | //////////////////////// InFilePersistentCache::OutputFile /////////////// | ||||
| @@ -114,8 +117,8 @@ public: | |||||
| //////////////////////// InFilePersistentCache::BlobStorage /////////////// | //////////////////////// InFilePersistentCache::BlobStorage /////////////// | ||||
| template <typename Input> | template <typename Input> | ||||
| InFilePersistentCache::BlobStorage& | |||||
| InFilePersistentCache::BlobStorage::init_from_input(Input& inp) { | |||||
| InFilePersistentCache::BlobStorage& InFilePersistentCache::BlobStorage::init_from_input( | |||||
| Input& inp) { | |||||
| uint32_t data_size; | uint32_t data_size; | ||||
| inp.read(data_size); | inp.read(data_size); | ||||
| size = data_size; | size = data_size; | ||||
| @@ -125,15 +128,14 @@ InFilePersistentCache::BlobStorage::init_from_input(Input& inp) { | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| void InFilePersistentCache::BlobStorage::write_to_file( | |||||
| OutputFile& out_file) const { | |||||
| void InFilePersistentCache::BlobStorage::write_to_file(OutputFile& out_file) const { | |||||
| uint32_t u_size = size; | uint32_t u_size = size; | ||||
| out_file.write(u_size); | out_file.write(u_size); | ||||
| out_file.write(data_refhold.get(), u_size); | out_file.write(data_refhold.get(), u_size); | ||||
| } | } | ||||
| InFilePersistentCache::BlobStorage& | |||||
| InFilePersistentCache::BlobStorage::init_data_ref(const Blob& b) { | |||||
| InFilePersistentCache::BlobStorage& InFilePersistentCache::BlobStorage::init_data_ref( | |||||
| const Blob& b) { | |||||
| data_refhold = std::make_unique<uint8_t[]>(b.size + 1); | data_refhold = std::make_unique<uint8_t[]>(b.size + 1); | ||||
| memcpy(data_refhold.get(), b.ptr, b.size); | memcpy(data_refhold.get(), b.ptr, b.size); | ||||
| data_refhold.get()[b.size] = 0; // for C-string safety | data_refhold.get()[b.size] = 0; // for C-string safety | ||||
| @@ -227,8 +229,8 @@ Maybe<InFilePersistentCache::Blob> InFilePersistentCache::get( | |||||
| return iter1->second; | return iter1->second; | ||||
| } | } | ||||
| void InFilePersistentCache::put(const std::string& category, const Blob& key, | |||||
| const Blob& value) { | |||||
| void InFilePersistentCache::put( | |||||
| const std::string& category, const Blob& key, const Blob& value) { | |||||
| BlobStorage key_storage; | BlobStorage key_storage; | ||||
| key_storage.init_data_ref(key).init_hash(); | key_storage.init_data_ref(key).init_hash(); | ||||
| @@ -49,13 +49,15 @@ class InFilePersistentCache final : public PersistentCache { | |||||
| size_t operator()(const BlobStorage& b) const { return b.hash; } | size_t operator()(const BlobStorage& b) const { return b.hash; } | ||||
| }; | }; | ||||
| }; | }; | ||||
| std::unordered_map<std::string, std::unordered_map<BlobStorage, BlobStorage, | |||||
| BlobStorage::Hash>> | |||||
| std::unordered_map< | |||||
| std::string, | |||||
| std::unordered_map<BlobStorage, BlobStorage, BlobStorage::Hash>> | |||||
| m_cache; | m_cache; | ||||
| MGB_MUTEX m_mtx; | MGB_MUTEX m_mtx; | ||||
| template <typename Input> | template <typename Input> | ||||
| void read_cache(Input& inp); | void read_cache(Input& inp); | ||||
| public: | public: | ||||
| InFilePersistentCache() = default; | InFilePersistentCache() = default; | ||||
| InFilePersistentCache(const char* path); | InFilePersistentCache(const char* path); | ||||
| @@ -68,8 +70,7 @@ public: | |||||
| void dump_cache(const char* path); | void dump_cache(const char* path); | ||||
| Maybe<Blob> get(const std::string& category, const Blob& key) override; | Maybe<Blob> get(const std::string& category, const Blob& key) override; | ||||
| void put(const std::string& category, const Blob& key, | |||||
| const Blob& value) override; | |||||
| void put(const std::string& category, const Blob& key, const Blob& value) override; | |||||
| bool support_dump_cache() override { return true; } | bool support_dump_cache() override { return true; } | ||||
| }; | }; | ||||
| } // namespace mgb | } // namespace mgb | ||||
| @@ -40,7 +40,7 @@ public: | |||||
| const std::string& category, const Blob& key, const Blob& value) = 0; | const std::string& category, const Blob& key, const Blob& value) = 0; | ||||
| virtual bool support_dump_cache() { return false; } | virtual bool support_dump_cache() { return false; } | ||||
| //! set an implementation; return the original implementation | //! set an implementation; return the original implementation | ||||
| static std::shared_ptr<PersistentCache> set_impl( | static std::shared_ptr<PersistentCache> set_impl( | ||||
| std::shared_ptr<PersistentCache> impl); | std::shared_ptr<PersistentCache> impl); | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "megbrain/opr/nn_int.h" | #include "megbrain/opr/nn_int.h" | ||||
| #include "megbrain/opr/tensor_manip.h" | #include "megbrain/opr/tensor_manip.h" | ||||
| #include "megbrain/utils/hash_ct.h" | |||||
| #include "midout.h" | #include "midout.h" | ||||
| MIDOUT_DECL(megbrain_opr_safe_dump) | MIDOUT_DECL(megbrain_opr_safe_dump) | ||||
| #define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_safe_dump, __VA_ARGS__) { | #define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_safe_dump, __VA_ARGS__) { | ||||
| @@ -38,24 +39,34 @@ template <> | |||||
| void write_param(std::string& /* data */, const DType& /* dtype */) {} | void write_param(std::string& /* data */, const DType& /* dtype */) {} | ||||
| template <class Opr> | template <class Opr> | ||||
| struct OprDumpImpl { | |||||
| static std::string dump(const cg::OperatorNodeBase* opr_) { | |||||
| MIDOUT_B(Opr) | |||||
| auto&& opr = opr_->cast_final_safe<Opr>(); | |||||
| std::string data; | |||||
| write_param(data, opr.param()); | |||||
| return data; | |||||
| MIDOUT_E | |||||
| } | |||||
| }; | |||||
| struct OprDumpImpl; | |||||
| #define INST(_Opr) \ | |||||
| #define cb(_Opr) \ | |||||
| template <> \ | |||||
| struct OprDumpImpl<_Opr> { \ | |||||
| static std::string dump(const cg::OperatorNodeBase* opr_) { \ | |||||
| MIDOUT_B(_Opr) \ | |||||
| auto&& opr = opr_->cast_final_safe<_Opr>(); \ | |||||
| std::string data; \ | |||||
| auto opr_hash = MGB_HASH_STR(#_Opr); \ | |||||
| write_param(data, opr_hash); \ | |||||
| write_param(data, opr.param()); \ | |||||
| return data; \ | |||||
| MIDOUT_E \ | |||||
| } \ | |||||
| }; | |||||
| FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) | |||||
| #undef cb | |||||
| #define cb(_Opr) \ | |||||
| template <> \ | template <> \ | ||||
| struct OprDumpImpl<_Opr> { \ | struct OprDumpImpl<_Opr> { \ | ||||
| static std::string dump(const cg::OperatorNodeBase* opr_) { \ | static std::string dump(const cg::OperatorNodeBase* opr_) { \ | ||||
| MIDOUT_B(_Opr) \ | MIDOUT_B(_Opr) \ | ||||
| auto&& opr = opr_->cast_final_safe<_Opr>(); \ | auto&& opr = opr_->cast_final_safe<_Opr>(); \ | ||||
| std::string data; \ | std::string data; \ | ||||
| auto opr_hash = MGB_HASH_STR(#_Opr); \ | |||||
| write_param(data, opr_hash); \ | |||||
| write_param(data, opr.param()); \ | write_param(data, opr.param()); \ | ||||
| using ExecutionPolicy = megdnn::param::ExecutionPolicy; \ | using ExecutionPolicy = megdnn::param::ExecutionPolicy; \ | ||||
| ExecutionPolicy policy{ \ | ExecutionPolicy policy{ \ | ||||
| @@ -66,11 +77,8 @@ struct OprDumpImpl { | |||||
| MIDOUT_E \ | MIDOUT_E \ | ||||
| } \ | } \ | ||||
| }; | }; | ||||
| INST(Convolution); | |||||
| INST(ConvBiasForward); | |||||
| INST(ConvolutionBackwardData); | |||||
| INST(PoolingForward); | |||||
| #undef INST | |||||
| FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) | |||||
| #undef cb | |||||
| } // namespace | } // namespace | ||||
| namespace mgb { | namespace mgb { | ||||
| @@ -83,8 +91,9 @@ std::string opr_safe_dump(const cg::OperatorNodeBase* opr) { | |||||
| return OprDumpImpl<_Opr>::dump(opr); \ | return OprDumpImpl<_Opr>::dump(opr); \ | ||||
| } else | } else | ||||
| FOREACH_SUPPORTED_OPR(cb) { | FOREACH_SUPPORTED_OPR(cb) { | ||||
| mgb_throw(InternalError, "unsupported operator(got:%s)", | |||||
| opr->dyn_typeinfo()->name); | |||||
| mgb_throw( | |||||
| InternalError, "unsupported operator(got:%s)", | |||||
| opr->dyn_typeinfo()->name); | |||||
| } | } | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||
| @@ -16,10 +16,16 @@ | |||||
| namespace mgb { | namespace mgb { | ||||
| namespace gopt { | namespace gopt { | ||||
| namespace intl { | namespace intl { | ||||
| #define FOREACH_SUPPORTED_OPR(cb) \ | |||||
| cb(Convolution) cb(ConvBiasForward) cb(ConvolutionBackwardData) \ | |||||
| cb(PoolingForward) cb(WarpPerspective) cb(Resize) cb(Elemwise) \ | |||||
| cb(ElemwiseMultiType) cb(Concat) cb(PowC) cb(TypeCvt) | |||||
| #define FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) \ | |||||
| cb(WarpPerspective) cb(Resize) cb(Elemwise) cb(ElemwiseMultiType) cb(Concat) \ | |||||
| cb(PowC) cb(TypeCvt) | |||||
| #define FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) \ | |||||
| cb(Convolution) cb(ConvBiasForward) cb(ConvolutionBackwardData) cb(PoolingForward) | |||||
| #define FOREACH_SUPPORTED_OPR(cb) \ | |||||
| FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) \ | |||||
| FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) | |||||
| std::string opr_safe_dump(const cg::OperatorNodeBase* opr); | std::string opr_safe_dump(const cg::OperatorNodeBase* opr); | ||||
| @@ -11,8 +11,8 @@ | |||||
| */ | */ | ||||
| #include "./opr_safe_dump.h" | #include "./opr_safe_dump.h" | ||||
| #include "megbrain/gopt/profiler.h" | |||||
| #include "megbrain/comp_node_env.h" | #include "megbrain/comp_node_env.h" | ||||
| #include "megbrain/gopt/profiler.h" | |||||
| using namespace mgb; | using namespace mgb; | ||||
| using namespace gopt; | using namespace gopt; | ||||
| @@ -21,9 +21,6 @@ using ReformatKey = ReformatManager::ReformatKey; | |||||
| // =================== ProfilerCache ====================== | // =================== ProfilerCache ====================== | ||||
| void ProfilerCache::Key::build_blob_from_opr() { | void ProfilerCache::Key::build_blob_from_opr() { | ||||
| auto&& opr = m_key_impl.opr_key.opr; | auto&& opr = m_key_impl.opr_key.opr; | ||||
| // process opr type | |||||
| auto type = opr->dyn_typeinfo()->name; | |||||
| size_t type_size = strlen(type); | |||||
| // process opr param | // process opr param | ||||
| auto data = intl::opr_safe_dump(opr); | auto data = intl::opr_safe_dump(opr); | ||||
| @@ -32,11 +29,7 @@ void ProfilerCache::Key::build_blob_from_opr() { | |||||
| size_t nr_inputs = opr->input().size(); | size_t nr_inputs = opr->input().size(); | ||||
| size_t nr_outputs = opr->usable_output().size(); | size_t nr_outputs = opr->usable_output().size(); | ||||
| size_t nr_layouts = nr_inputs + nr_outputs; | size_t nr_layouts = nr_inputs + nr_outputs; | ||||
| m_blob_storage.reserve(sizeof(TensorLayout) * 3 * nr_layouts + type_size + | |||||
| param_size); | |||||
| // serialize opr type | |||||
| m_blob_storage.append(type, type_size); | |||||
| m_blob_storage.reserve(sizeof(TensorLayout) * 3 * nr_layouts + param_size); | |||||
| // serialize param | // serialize param | ||||
| const char* data_ptr = reinterpret_cast<const char*>(data.data()); | const char* data_ptr = reinterpret_cast<const char*>(data.data()); | ||||
| @@ -70,12 +63,12 @@ void ProfilerCache::Key::build_blob_from_opr() { | |||||
| } | } | ||||
| // serialize opr_format | // serialize opr_format | ||||
| m_blob_storage.append(std::to_string( | |||||
| static_cast<uint32_t>(m_key_impl.opr_key.opr_format))); | |||||
| m_blob_storage.append( | |||||
| std::to_string(static_cast<uint32_t>(m_key_impl.opr_key.opr_format))); | |||||
| // serialize extra_attribute | // serialize extra_attribute | ||||
| m_blob_storage.append(std::to_string( | |||||
| static_cast<uint32_t>(m_key_impl.opr_key.extra_attribute))); | |||||
| m_blob_storage.append( | |||||
| std::to_string(static_cast<uint32_t>(m_key_impl.opr_key.extra_attribute))); | |||||
| } | } | ||||
| void ProfilerCache::Key::build_category(CompNode cn) { | void ProfilerCache::Key::build_category(CompNode cn) { | ||||
| @@ -85,8 +78,8 @@ void ProfilerCache::Key::build_category(CompNode cn) { | |||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| case CompNode::DeviceType::CUDA: { | case CompNode::DeviceType::CUDA: { | ||||
| auto&& prop = env.cuda_env().device_prop; | auto&& prop = env.cuda_env().device_prop; | ||||
| m_category += ssprintf("plat=cuda;dev=%s;cap=%d.%d", prop.name, | |||||
| prop.major, prop.minor); | |||||
| m_category += ssprintf( | |||||
| "plat=cuda;dev=%s;cap=%d.%d", prop.name, prop.major, prop.minor); | |||||
| break; | break; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -94,9 +87,10 @@ void ProfilerCache::Key::build_category(CompNode cn) { | |||||
| m_category += "plat=cpu"; | m_category += "plat=cpu"; | ||||
| break; | break; | ||||
| default: | default: | ||||
| mgb_throw(MegBrainError, | |||||
| "unsupported comp node for global layout transform " | |||||
| "profiler cache category"); | |||||
| mgb_throw( | |||||
| MegBrainError, | |||||
| "unsupported comp node for global layout transform " | |||||
| "profiler cache category"); | |||||
| } | } | ||||
| } | } | ||||
| @@ -151,9 +145,10 @@ ProfilerCache& ProfilerCache::set_impl(std::unique_ptr<PersistentCache> impl) { | |||||
| } | } | ||||
| void ProfilerCache::dump_cache(const char* path) { | void ProfilerCache::dump_cache(const char* path) { | ||||
| mgb_assert(m_impl->support_dump_cache(), | |||||
| "current impl of ProfilerCache does not support dump cache to " | |||||
| "file."); | |||||
| mgb_assert( | |||||
| m_impl->support_dump_cache(), | |||||
| "current impl of ProfilerCache does not support dump cache to " | |||||
| "file."); | |||||
| auto cache = static_cast<InFilePersistentCache*>(m_impl.get()); | auto cache = static_cast<InFilePersistentCache*>(m_impl.get()); | ||||
| cache->dump_cache(path); | cache->dump_cache(path); | ||||
| } | } | ||||
| @@ -165,8 +160,9 @@ Maybe<ProfilerCache::Result> ProfilerCache::get(const Key& key) { | |||||
| // data type of cost is float | // data type of cost is float | ||||
| auto buf = static_cast<const uint8_t*>(raw_buf->ptr); | auto buf = static_cast<const uint8_t*>(raw_buf->ptr); | ||||
| auto size = raw_buf->size; | auto size = raw_buf->size; | ||||
| mgb_assert(buf && size == sizeof(float), | |||||
| "ProfileCache invalid value: ptr=%p, size=%zu", buf, size); | |||||
| mgb_assert( | |||||
| buf && size == sizeof(float), | |||||
| "ProfileCache invalid value: ptr=%p, size=%zu", buf, size); | |||||
| auto read_f32 = [&]() { | auto read_f32 = [&]() { | ||||
| auto ret = *reinterpret_cast<const float*>(buf); | auto ret = *reinterpret_cast<const float*>(buf); | ||||
| return ret; | return ret; | ||||
| @@ -154,33 +154,30 @@ void MarkInputContiguous::init_output_static_infer_desc() { | |||||
| } // namespace | } // namespace | ||||
| /* ================== ProfilerImpl =================*/ | /* ================== ProfilerImpl =================*/ | ||||
| ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, | |||||
| float var_node_threshold) | |||||
| ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, float var_node_threshold) | |||||
| : m_opr_threshold{opr_threshold}, | : m_opr_threshold{opr_threshold}, | ||||
| m_var_node_threshold{var_node_threshold}, | m_var_node_threshold{var_node_threshold}, | ||||
| m_runs{runs} { | m_runs{runs} { | ||||
| m_opr_filter = [this](const OperatorNodeBase* opr, | |||||
| OperatorNodeBase* new_opr) { | |||||
| m_opr_filter = [this](const OperatorNodeBase* opr, OperatorNodeBase* new_opr) { | |||||
| /// \note: for the considerations of performance, we skip nchw(naive) | /// \note: for the considerations of performance, we skip nchw(naive) | ||||
| /// kernels for conv bias on CUDA platform. to remove this later | /// kernels for conv bias on CUDA platform. to remove this later | ||||
| if (auto conv = try_cast_as_op<opr::ConvBiasForward>(new_opr)) { | if (auto conv = try_cast_as_op<opr::ConvBiasForward>(new_opr)) { | ||||
| if (conv->output(0)->comp_node().device_type() == | if (conv->output(0)->comp_node().device_type() == | ||||
| CompNode::DeviceType::CUDA && | CompNode::DeviceType::CUDA && | ||||
| conv->input(0)->dtype().category() == | |||||
| DTypeCategory::QUANTIZED && | |||||
| conv->input(0)->dtype().category() == DTypeCategory::QUANTIZED && | |||||
| conv->param().format == OprFormat::NCHW) { | conv->param().format == OprFormat::NCHW) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| float comp1 = m_opr_footprint.get_computation( | |||||
| const_cast<OperatorNodeBase*>(opr)); | |||||
| float comp1 = | |||||
| m_opr_footprint.get_computation(const_cast<OperatorNodeBase*>(opr)); | |||||
| float comp2 = m_opr_footprint.get_computation(new_opr); | float comp2 = m_opr_footprint.get_computation(new_opr); | ||||
| if (comp2 > m_opr_threshold * comp1) | if (comp2 > m_opr_threshold * comp1) | ||||
| return false; | return false; | ||||
| return true; | return true; | ||||
| }; | }; | ||||
| m_var_node_filter = [this](const VarNode* var, TensorShape from, | |||||
| TensorShape to, ReformatKey key) { | |||||
| m_var_node_filter = [this](const VarNode* var, TensorShape from, TensorShape to, | |||||
| ReformatKey key) { | |||||
| /// \note: due to the alignment requirement of low-bit tensor, we skip | /// \note: due to the alignment requirement of low-bit tensor, we skip | ||||
| /// some layout transform for low-bit tensors. The skipped layout | /// some layout transform for low-bit tensors. The skipped layout | ||||
| /// transforms do not have corresponding dnn kernel and cannot be | /// transforms do not have corresponding dnn kernel and cannot be | ||||
| @@ -202,8 +199,7 @@ ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, | |||||
| TensorLayout orig_ly = {var->shape(), var->dtype()}, | TensorLayout orig_ly = {var->shape(), var->dtype()}, | ||||
| from_ly = {from, var->dtype()}, to_ly = {to, var->dtype()}; | from_ly = {from, var->dtype()}, to_ly = {to, var->dtype()}; | ||||
| float orig_memory = orig_ly.span().dist_byte() * 2.f; | float orig_memory = orig_ly.span().dist_byte() * 2.f; | ||||
| float reformat_memory = | |||||
| from_ly.span().dist_byte() + to_ly.span().dist_byte(); | |||||
| float reformat_memory = from_ly.span().dist_byte() + to_ly.span().dist_byte(); | |||||
| if (reformat_memory > orig_memory * m_var_node_threshold) | if (reformat_memory > orig_memory * m_var_node_threshold) | ||||
| return false; | return false; | ||||
| return true; | return true; | ||||
| @@ -537,23 +533,20 @@ std::unique_ptr<ProfilerBase> ProfilerBase::make_profiler() { | |||||
| return std::make_unique<ProfilerImpl>(); | return std::make_unique<ProfilerImpl>(); | ||||
| } | } | ||||
| std::unique_ptr<ProfilerBase> ProfilerBase::make_cached_profiler( | |||||
| const char* path) { | |||||
| std::unique_ptr<ProfilerBase> ProfilerBase::make_cached_profiler(const char* path) { | |||||
| return std::make_unique<CachedProfiler>(path); | return std::make_unique<CachedProfiler>(path); | ||||
| } | } | ||||
| /* ================== CachedProfiler =================*/ | /* ================== CachedProfiler =================*/ | ||||
| CachedProfiler::CachedProfiler(const char* path, int runs, float opr_threshold, | |||||
| float var_node_threshold) | |||||
| CachedProfiler::CachedProfiler( | |||||
| const char* path, int runs, float opr_threshold, float var_node_threshold) | |||||
| : ProfilerImpl(runs, opr_threshold, var_node_threshold), m_path{path} { | : ProfilerImpl(runs, opr_threshold, var_node_threshold), m_path{path} { | ||||
| if (m_path != nullptr) { // file cache | if (m_path != nullptr) { // file cache | ||||
| ProfilerCache::inst().set_impl( | |||||
| std::make_unique<InFilePersistentCache>(m_path)); | |||||
| ProfilerCache::inst().set_impl(std::make_unique<InFilePersistentCache>(m_path)); | |||||
| } | } | ||||
| } | } | ||||
| CachedProfiler::ProfilingResult CachedProfiler::profile( | |||||
| const Problem& problem) const { | |||||
| CachedProfiler::ProfilingResult CachedProfiler::profile(const Problem& problem) const { | |||||
| auto ret = ProfilerImpl::profile(problem); | auto ret = ProfilerImpl::profile(problem); | ||||
| if (m_path != nullptr) | if (m_path != nullptr) | ||||
| ProfilerCache::inst().dump_cache(m_path); | ProfilerCache::inst().dump_cache(m_path); | ||||
| @@ -563,35 +556,33 @@ CachedProfiler::ProfilingResult CachedProfiler::profile( | |||||
| float CachedProfiler::profile_operator( | float CachedProfiler::profile_operator( | ||||
| const OperatorNodeBase* opr, TensorFormats base_format, | const OperatorNodeBase* opr, TensorFormats base_format, | ||||
| TensorFormats tensor_format, ReformatAttribute extra_attribute) const { | TensorFormats tensor_format, ReformatAttribute extra_attribute) const { | ||||
| ProfilerCache::Key key{opr, tensor_formats_to_opr_format(tensor_format), | |||||
| extra_attribute}; | |||||
| ProfilerCache::Key key{ | |||||
| opr, tensor_formats_to_opr_format(tensor_format), extra_attribute}; | |||||
| auto ret = ProfilerCache::inst().get(key); | auto ret = ProfilerCache::inst().get(key); | ||||
| if (ret.valid()) | if (ret.valid()) | ||||
| return ret.val(); | return ret.val(); | ||||
| auto rst = ProfilerImpl::profile_operator(opr, base_format, tensor_format, | |||||
| extra_attribute); | |||||
| auto rst = ProfilerImpl::profile_operator( | |||||
| opr, base_format, tensor_format, extra_attribute); | |||||
| ProfilerCache::inst().put(key, rst); | ProfilerCache::inst().put(key, rst); | ||||
| return rst; | return rst; | ||||
| } | } | ||||
| float CachedProfiler::profile_operator( | float CachedProfiler::profile_operator( | ||||
| const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OperatorNodeBase* opr, const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | const OprTensorFormatsConfiguration& config, | ||||
| ReformatAttribute extra_attribute) const { | ReformatAttribute extra_attribute) const { | ||||
| ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; | ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; | ||||
| auto ret = ProfilerCache::inst().get(key); | auto ret = ProfilerCache::inst().get(key); | ||||
| if (ret.valid()) | if (ret.valid()) | ||||
| return ret.val(); | return ret.val(); | ||||
| auto rst = ProfilerImpl::profile_operator(opr, base_config, config, | |||||
| extra_attribute); | |||||
| auto rst = | |||||
| ProfilerImpl::profile_operator(opr, base_config, config, extra_attribute); | |||||
| ProfilerCache::inst().put(key, rst); | ProfilerCache::inst().put(key, rst); | ||||
| return rst; | return rst; | ||||
| } | } | ||||
| float CachedProfiler::profile_var_node(const VarNode* var, | |||||
| TensorFormats base_format, | |||||
| const ReformatKey& key) const { | |||||
| float CachedProfiler::profile_var_node( | |||||
| const VarNode* var, TensorFormats base_format, const ReformatKey& key) const { | |||||
| ProfilerCache::Key pf_key{var, key}; | ProfilerCache::Key pf_key{var, key}; | ||||
| auto ret = ProfilerCache::inst().get(pf_key); | auto ret = ProfilerCache::inst().get(pf_key); | ||||
| if (ret.valid()) | if (ret.valid()) | ||||
| @@ -78,7 +78,7 @@ public: | |||||
| const VarNode*, TensorShape, TensorShape, ReformatManager::ReformatKey)>; | const VarNode*, TensorShape, TensorShape, ReformatManager::ReformatKey)>; | ||||
| ProfilerBase() = default; | ProfilerBase() = default; | ||||
| virtual ~ProfilerBase() = default; | virtual ~ProfilerBase() = default; | ||||
| virtual ProfilingResult profile(const Problem& problem) const = 0; | virtual ProfilingResult profile(const Problem& problem) const = 0; | ||||
| @@ -102,13 +102,12 @@ protected: | |||||
| VarNodeFilter m_var_node_filter; | VarNodeFilter m_var_node_filter; | ||||
| }; | }; | ||||
| /*! \brief A default profiler impl | /*! \brief A default profiler impl | ||||
| */ | */ | ||||
| class ProfilerImpl : public ProfilerBase { | class ProfilerImpl : public ProfilerBase { | ||||
| public: | public: | ||||
| ProfilerImpl(int runs = 10, float opr_threshold = 2.f, | |||||
| float var_node_threshold = 2.f); | |||||
| ProfilerImpl( | |||||
| int runs = 10, float opr_threshold = 2.f, float var_node_threshold = 2.f); | |||||
| ~ProfilerImpl() = default; | ~ProfilerImpl() = default; | ||||
| ProfilingResult profile(const Problem& problem) const override; | ProfilingResult profile(const Problem& problem) const override; | ||||
| @@ -128,22 +127,22 @@ protected: | |||||
| OperatorNodeRecord profile_operator( | OperatorNodeRecord profile_operator( | ||||
| const OperatorNodeBase* opr, TensorFormats base_format, | const OperatorNodeBase* opr, TensorFormats base_format, | ||||
| const SmallVector<TensorFormats>& available_tensor_formats, | const SmallVector<TensorFormats>& available_tensor_formats, | ||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const; | |||||
| ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; | |||||
| /*! | /*! | ||||
| * \brief prfile opr format agnostic operators (like elemwise, elemwise multi type, typecvt etc.) | |||||
| * \brief prfile opr format agnostic operators (like elemwise, elemwise multi type, | |||||
| * typecvt etc.) | |||||
| * | * | ||||
| * \param opr pointer to the operator to be profiled | * \param opr pointer to the operator to be profiled | ||||
| * \param base_format the original tensor format of the operator node. | * \param base_format the original tensor format of the operator node. | ||||
| * \param tensor_format the tensor format to be profiled | * \param tensor_format the tensor format to be profiled | ||||
| * \param extra_attribute identify whether to use image object for OpenCL or automatically padding nhwc layout | |||||
| * \return elapsed time of operator in the given tensor format configuration | |||||
| * \param extra_attribute identify whether to use image object for OpenCL or | |||||
| * automatically padding nhwc layout \return elapsed time of operator in the given | |||||
| * tensor format configuration | |||||
| */ | */ | ||||
| virtual float profile_operator( | virtual float profile_operator( | ||||
| const OperatorNodeBase* opr, TensorFormats base_format, | const OperatorNodeBase* opr, TensorFormats base_format, | ||||
| TensorFormats tensor_format, | TensorFormats tensor_format, | ||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const; | |||||
| ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; | |||||
| /*! | /*! | ||||
| * \brief profile opr format aware operators (like conv, deconv, conv_bias, | * \brief profile opr format aware operators (like conv, deconv, conv_bias, | ||||
| * etc.) | * etc.) | ||||
| @@ -157,28 +156,29 @@ protected: | |||||
| const OperatorNodeBase* opr, | const OperatorNodeBase* opr, | ||||
| const OprTensorFormatsConfiguration& base_config, | const OprTensorFormatsConfiguration& base_config, | ||||
| const SmallVector<OprTensorFormatsConfiguration>& available_configs, | const SmallVector<OprTensorFormatsConfiguration>& available_configs, | ||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const; | |||||
| ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; | |||||
| /*! | /*! | ||||
| * \brief prfile opr format aware operators (like conv, deconv, conv_bias, resize, warp etc.) | |||||
| * \brief prfile opr format aware operators (like conv, deconv, conv_bias, resize, | |||||
| * warp etc.) | |||||
| * | * | ||||
| * \param opr pointer to the operator to be profiled | * \param opr pointer to the operator to be profiled | ||||
| * \param base_config the original opr format configuration of the operator node, | |||||
| * \param base_config the original opr format configuration of the operator node, | |||||
| * \param config the opr format configuration to be profiled | * \param config the opr format configuration to be profiled | ||||
| * \param extra_attribute identify whether to use image object for OpenCL or automatically padding nhwc layout | |||||
| * \return elapsed time of operator in the given opr format configuration | |||||
| * \param extra_attribute identify whether to use image object for OpenCL or | |||||
| * automatically padding nhwc layout \return elapsed time of operator in the given | |||||
| * opr format configuration | |||||
| */ | */ | ||||
| virtual float profile_operator(const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const; | |||||
| virtual float profile_operator( | |||||
| const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; | |||||
| /*! | /*! | ||||
| * \brief profile layout transform of the var node | * \brief profile layout transform of the var node | ||||
| * | * | ||||
| * \param var pointer to the var node to be profiled | * \param var pointer to the var node to be profiled | ||||
| * \param base_format the original tensor formats in which the var node is | * \param base_format the original tensor formats in which the var node is | ||||
| * stored | |||||
| * stored | |||||
| * \param available_tensor_formats the available tensor formats | * \param available_tensor_formats the available tensor formats | ||||
| * \param extra_attribute the extra attributes (options) of the problem | * \param extra_attribute the extra attributes (options) of the problem | ||||
| * \return the var node record | * \return the var node record | ||||
| @@ -186,27 +186,26 @@ protected: | |||||
| VarNodeRecord profile_var_node( | VarNodeRecord profile_var_node( | ||||
| const VarNode* var, TensorFormats base_format, | const VarNode* var, TensorFormats base_format, | ||||
| const SmallVector<TensorFormats>& available_tensor_formats, | const SmallVector<TensorFormats>& available_tensor_formats, | ||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const; | |||||
| ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; | |||||
| /*! | /*! | ||||
| * \brief profile layout transform of the var node | * \brief profile layout transform of the var node | ||||
| * | * | ||||
| * \param var pointer to the var node to be profiled | * \param var pointer to the var node to be profiled | ||||
| * \param base_format the original tensor formats in which the var node is | * \param base_format the original tensor formats in which the var node is | ||||
| * stored | * stored | ||||
| * \param key type of ReformatKey, identify the information/attributes of the layout transoform | |||||
| * \return elapsed time of the layout transform | |||||
| * \param key type of ReformatKey, identify the information/attributes of the layout | |||||
| * transoform \return elapsed time of the layout transform | |||||
| */ | */ | ||||
| virtual float profile_var_node(const VarNode* var, | |||||
| TensorFormats base_format, | |||||
| const ReformatKey& key) const; | |||||
| virtual float profile_var_node( | |||||
| const VarNode* var, TensorFormats base_format, | |||||
| const ReformatKey& key) const; | |||||
| OprFootprint m_opr_footprint; | OprFootprint m_opr_footprint; | ||||
| float m_opr_threshold; /// a threshold, when the computation of the newly | |||||
| /// created operator that is built in some opr | |||||
| /// format configuration is as greater as | |||||
| /// m_opr_threshold times of the original operator, | |||||
| /// the opr format configuration will be skipped | |||||
| /// (i.e. the cost is infinite) | |||||
| float m_opr_threshold; /// a threshold, when the computation of the newly | |||||
| /// created operator that is built in some opr | |||||
| /// format configuration is as greater as | |||||
| /// m_opr_threshold times of the original operator, | |||||
| /// the opr format configuration will be skipped | |||||
| /// (i.e. the cost is infinite) | |||||
| float m_var_node_threshold; /// a threshold, when the memory footprint of | float m_var_node_threshold; /// a threshold, when the memory footprint of | ||||
| /// the layout transform of the var node is as | /// the layout transform of the var node is as | ||||
| /// larger as m_var_node_threshold as the var | /// larger as m_var_node_threshold as the var | ||||
| @@ -298,23 +297,26 @@ private: | |||||
| class CachedProfiler final : public ProfilerImpl { | class CachedProfiler final : public ProfilerImpl { | ||||
| public: | public: | ||||
| CachedProfiler(const char* path = nullptr, int runs = 10, | |||||
| float opr_threshold = 2.f, float var_node_threshold = 2.f); | |||||
| CachedProfiler( | |||||
| const char* path = nullptr, int runs = 10, float opr_threshold = 2.f, | |||||
| float var_node_threshold = 2.f); | |||||
| ProfilingResult profile(const Problem& problem) const override; | ProfilingResult profile(const Problem& problem) const override; | ||||
| private: | private: | ||||
| float profile_operator(const OperatorNodeBase* opr, | |||||
| TensorFormats base_format, | |||||
| TensorFormats tensor_format, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override; | |||||
| float profile_operator(const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override; | |||||
| float profile_var_node(const VarNode* var, TensorFormats base_format, | |||||
| const ReformatKey& key) const override; | |||||
| float profile_operator( | |||||
| const OperatorNodeBase* opr, TensorFormats base_format, | |||||
| TensorFormats tensor_format, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override; | |||||
| float profile_operator( | |||||
| const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override; | |||||
| float profile_var_node( | |||||
| const VarNode* var, TensorFormats base_format, | |||||
| const ReformatKey& key) const override; | |||||
| const char* m_path; | const char* m_path; | ||||
| }; | }; | ||||
| @@ -7,19 +7,21 @@ | |||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # 为了保证全局图优化里的 profiling 结果不受到 ci 环境的影响,所以把写死的 profiling 结果存到了 cache 里去, | |||||
| # 每次跑测试会从内存里读取 cache 里的 profiling 结果,然后根据 profiling 结果去做全局图优化。 | |||||
| # 这个脚本用来把 dump 出去的 cache 文件转化成 cache 的头文件,用于测试时读取数据。 | |||||
| # 如果在 src/gopt/test/layout_transform_pass.cpp 里添加了全局图优化相关的测试,则需要考虑用这个脚本来 | |||||
| # 处理一下 profiling 数据。 | |||||
| # 为了保证全局图优化里的 profiling 结果不受到 ci 环境的影响,所以把写死的 profiling 数据存到了 cache 里去, | |||||
| # 每次跑测试会从内存 cache 里读取 profiling 结果,然后根据 profiling 结果去做全局图优化,这样确保每次运行 | |||||
| # 结果都是一致的。 | |||||
| # ProfilerCache 可以支持把内存中 cache 下来的 profiling 数据 dump 成文件。 | |||||
| # 这个脚本就是用于把 dump 出去的 cache 文件打包成 cache 的头文件,用于测试时读取数据,构建 InMemory 的 ProfilerCache 。 | |||||
| # 如果在 src/gopt/test/layout_transform_pass.cpp 里新添加了全局图优化相关的测试,则需要考虑用这个脚本来 | |||||
| # 更新 cache 头文件中的 profiling 数据。 | |||||
| # 1. 首先将 src/gopt/test/layout_transform_pass.cpp 中的 `#define MGB_WITH_CACHED_TEST 1` 修改为 | # 1. 首先将 src/gopt/test/layout_transform_pass.cpp 中的 `#define MGB_WITH_CACHED_TEST 1` 修改为 | ||||
| # `#define MGB_WITH_CACHED_TEST 0` | # `#define MGB_WITH_CACHED_TEST 0` | ||||
| # 2. 编译megbrain_test,并运行所有全局图优化相关测试: | # 2. 编译megbrain_test,并运行所有全局图优化相关测试: | ||||
| # ./megbrain_test --gtest_filter="*LayoutTransform*" | # ./megbrain_test --gtest_filter="*LayoutTransform*" | ||||
| # 3. 用这个脚本把所有的cache文件打包在一起 | # 3. 用这个脚本把所有的cache文件打包在一起 | ||||
| # python3 embed_cache.py -o cache_data.h $(ls /path/to/cache/*.cache) | # python3 embed_cache.py -o cache_data.h $(ls /path/to/cache/*.cache) | ||||
| # 4. 将步骤1中的 define 改回去,这样 profile 过程用到的是 cache 下来的数据。随后可以重新构建 megbrain_test , | |||||
| # 验证测试是否正确。 | |||||
| # 4. 将步骤1中的 define 语句改回原样,这样 profile 过程就会使用 cache 下来的数据。 | |||||
| # 5. 最后可以重新构建一下 megbrain_test ,确保测试结果正确。 | |||||
| import os.path | import os.path | ||||
| import logging | import logging | ||||
| import hashlib | import hashlib | ||||
| @@ -78,8 +78,9 @@ OprFormat tensor_formats_to_opr_format(TensorFormats tensor_format) { | |||||
| case TensorFormats::CHWNc4: | case TensorFormats::CHWNc4: | ||||
| return OprFormat::CHWN4; | return OprFormat::CHWN4; | ||||
| default: | default: | ||||
| mgb_throw(MegBrainError, "tensor format(%u) is not supported", | |||||
| static_cast<uint32_t>(tensor_format)); | |||||
| mgb_throw( | |||||
| MegBrainError, "tensor format(%u) is not supported", | |||||
| static_cast<uint32_t>(tensor_format)); | |||||
| } | } | ||||
| } | } | ||||
| @@ -92,28 +93,28 @@ public: | |||||
| } | } | ||||
| ~ProfilerMock() { | ~ProfilerMock() { | ||||
| // reset in memory cache | // reset in memory cache | ||||
| ProfilerCache::inst().set_impl( | |||||
| std::make_unique<InMemoryPersistentCache>()); | |||||
| ProfilerCache::inst().set_impl(std::make_unique<InMemoryPersistentCache>()); | |||||
| } | } | ||||
| private: | private: | ||||
| float profile_operator(const OperatorNodeBase* opr, | |||||
| TensorFormats base_format, | |||||
| TensorFormats tensor_format, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override { | |||||
| ProfilerCache::Key key{opr, tensor_formats_to_opr_format(tensor_format), | |||||
| extra_attribute}; | |||||
| float profile_operator( | |||||
| const OperatorNodeBase* opr, TensorFormats base_format, | |||||
| TensorFormats tensor_format, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override { | |||||
| ProfilerCache::Key key{ | |||||
| opr, tensor_formats_to_opr_format(tensor_format), extra_attribute}; | |||||
| auto ret = ProfilerCache::inst().get(key); | auto ret = ProfilerCache::inst().get(key); | ||||
| if (ret.valid()) | if (ret.valid()) | ||||
| return ret.val(); | return ret.val(); | ||||
| mgb_assert(false); | mgb_assert(false); | ||||
| } | } | ||||
| float profile_operator(const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override { | |||||
| float profile_operator( | |||||
| const OperatorNodeBase* opr, | |||||
| const OprTensorFormatsConfiguration& base_config, | |||||
| const OprTensorFormatsConfiguration& config, | |||||
| ReformatAttribute extra_attribute = | |||||
| ReformatAttribute::DEFAULT) const override { | |||||
| ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; | ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; | ||||
| std::string tmp; | std::string tmp; | ||||
| tmp.reserve(key.blob().size); | tmp.reserve(key.blob().size); | ||||
| @@ -122,8 +123,9 @@ private: | |||||
| return ret.val(); | return ret.val(); | ||||
| mgb_assert(false); | mgb_assert(false); | ||||
| } | } | ||||
| float profile_var_node(const VarNode* var, TensorFormats base_format, | |||||
| const ReformatKey& key) const override { | |||||
| float profile_var_node( | |||||
| const VarNode* var, TensorFormats base_format, | |||||
| const ReformatKey& key) const override { | |||||
| ProfilerCache::Key pf_key{var, key}; | ProfilerCache::Key pf_key{var, key}; | ||||
| auto ret = ProfilerCache::inst().get(pf_key); | auto ret = ProfilerCache::inst().get(pf_key); | ||||
| if (ret.valid()) | if (ret.valid()) | ||||
| @@ -174,18 +176,17 @@ TEST(TestLayoutTransform, Resnet18_QS8) { | |||||
| OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, | OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, | ||||
| ReformatAttribute::AUTO_PADDING_NHWC}; | ReformatAttribute::AUTO_PADDING_NHWC}; | ||||
| auto ctx = std::make_unique<LayoutTransformContext>( | auto ctx = std::make_unique<LayoutTransformContext>( | ||||
| std::move(opr_list), std::move(available_tensor_formats), | |||||
| attribute); | |||||
| ctx->add_opr_config(opr::ConvBiasForward::typeinfo(), | |||||
| {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4, | |||||
| OprFormat::NHWC}) | |||||
| .add_opr_config(opr::PoolingForward::typeinfo(), | |||||
| {OprFormat::NCHW4, OprFormat::NCHW32, | |||||
| OprFormat::NHWC, OprFormat::CHWN4}); | |||||
| std::move(opr_list), std::move(available_tensor_formats), attribute); | |||||
| ctx->add_opr_config( | |||||
| opr::ConvBiasForward::typeinfo(), | |||||
| {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4, OprFormat::NHWC}) | |||||
| .add_opr_config( | |||||
| opr::PoolingForward::typeinfo(), | |||||
| {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::NHWC, | |||||
| OprFormat::CHWN4}); | |||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>( | |||||
| TestLayoutTransform_Resnet18_QS8.data()), | |||||
| static_cast<const uint8_t*>(TestLayoutTransform_Resnet18_QS8.data()), | |||||
| TestLayoutTransform_Resnet18_QS8.size()); | TestLayoutTransform_Resnet18_QS8.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | auto profiler = ProfilerBase::make_cached_profiler( | ||||
| @@ -278,8 +279,7 @@ TEST(TestLayoutTransform, Resnet18_QS4) { | |||||
| OprFormat::NHWC, OprFormat::CHWN4}); | OprFormat::NHWC, OprFormat::CHWN4}); | ||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>( | |||||
| TestLayoutTransform_Resnet18_QS4.data()), | |||||
| static_cast<const uint8_t*>(TestLayoutTransform_Resnet18_QS4.data()), | |||||
| TestLayoutTransform_Resnet18_QS4.size()); | TestLayoutTransform_Resnet18_QS4.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | auto profiler = ProfilerBase::make_cached_profiler( | ||||
| @@ -401,8 +401,7 @@ TEST(TestLayoutTransform, Detection_QS8) { | |||||
| OprFormat::NHWC, OprFormat::CHWN4}); | OprFormat::NHWC, OprFormat::CHWN4}); | ||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>( | |||||
| TestLayoutTransform_Detection_QS8.data()), | |||||
| static_cast<const uint8_t*>(TestLayoutTransform_Detection_QS8.data()), | |||||
| TestLayoutTransform_Detection_QS8.size()); | TestLayoutTransform_Detection_QS8.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | auto profiler = ProfilerBase::make_cached_profiler( | ||||
| @@ -479,8 +478,7 @@ TEST(TestLayoutTransform, Detection_QS4) { | |||||
| OprFormat::NHWC, OprFormat::CHWN4}); | OprFormat::NHWC, OprFormat::CHWN4}); | ||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>( | |||||
| TestLayoutTransform_Detection_QS4.data()), | |||||
| static_cast<const uint8_t*>(TestLayoutTransform_Detection_QS4.data()), | |||||
| TestLayoutTransform_Detection_QS4.size()); | TestLayoutTransform_Detection_QS4.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | auto profiler = ProfilerBase::make_cached_profiler( | ||||
| @@ -553,17 +551,16 @@ TEST(TestLayoutTransform, Wide) { | |||||
| OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, | OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, | ||||
| ReformatAttribute::DEFAULT}; | ReformatAttribute::DEFAULT}; | ||||
| auto ctx = std::make_unique<LayoutTransformContext>( | auto ctx = std::make_unique<LayoutTransformContext>( | ||||
| std::move(opr_list), std::move(available_tensor_formats), | |||||
| attribute); | |||||
| ctx->add_opr_config(opr::ConvBiasForward::typeinfo(), | |||||
| {OprFormat::NCHW, OprFormat::NHWC}); | |||||
| std::move(opr_list), std::move(available_tensor_formats), attribute); | |||||
| ctx->add_opr_config( | |||||
| opr::ConvBiasForward::typeinfo(), {OprFormat::NCHW, OprFormat::NHWC}); | |||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>(TestLayoutTransform_Wide.data()), | static_cast<const uint8_t*>(TestLayoutTransform_Wide.data()), | ||||
| TestLayoutTransform_Wide.size()); | TestLayoutTransform_Wide.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | |||||
| "TestLayoutTransform.Wide.cache"); | |||||
| auto profiler = | |||||
| ProfilerBase::make_cached_profiler("TestLayoutTransform.Wide.cache"); | |||||
| #endif | #endif | ||||
| std::unique_ptr<SolverBase> solver{ | std::unique_ptr<SolverBase> solver{ | ||||
| new DynamicProgrammingSolver(std::move(profiler))}; | new DynamicProgrammingSolver(std::move(profiler))}; | ||||
| @@ -674,8 +671,7 @@ TEST(TestLayoutTransform, DetectionHead) { | |||||
| {OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64}); | {OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64}); | ||||
| #if MGB_WITH_CACHED_TEST | #if MGB_WITH_CACHED_TEST | ||||
| auto profiler = std::make_unique<ProfilerMock>( | auto profiler = std::make_unique<ProfilerMock>( | ||||
| static_cast<const uint8_t*>( | |||||
| TestLayoutTransform_DetectionHead.data()), | |||||
| static_cast<const uint8_t*>(TestLayoutTransform_DetectionHead.data()), | |||||
| TestLayoutTransform_DetectionHead.size()); | TestLayoutTransform_DetectionHead.size()); | ||||
| #else | #else | ||||
| auto profiler = ProfilerBase::make_cached_profiler( | auto profiler = ProfilerBase::make_cached_profiler( | ||||