/** * \file dnn/src/x86/profile.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "src/x86/profile.h" namespace megdnn { namespace x86 { ProfileCache get_profile_cache() { ProfileCache vec; vec.clear(); vec.reserve(294); vec.push_back(ProfileElement(2, 4, 4, 49)); vec.push_back(ProfileElement(2, 4, 8, 25)); vec.push_back(ProfileElement(2, 4, 16, 19)); vec.push_back(ProfileElement(2, 4, 32, 14)); vec.push_back(ProfileElement(2, 4, 64, 13)); vec.push_back(ProfileElement(2, 4, 96, 15)); vec.push_back(ProfileElement(2, 4, 128, 15)); vec.push_back(ProfileElement(2, 8, 4, 241)); vec.push_back(ProfileElement(2, 8, 8, 121)); vec.push_back(ProfileElement(2, 8, 16, 57)); vec.push_back(ProfileElement(2, 8, 32, 29)); vec.push_back(ProfileElement(2, 8, 64, 17)); vec.push_back(ProfileElement(2, 8, 96, 39)); vec.push_back(ProfileElement(2, 8, 128, 29)); vec.push_back(ProfileElement(2, 16, 4, 1000000000)); vec.push_back(ProfileElement(2, 16, 8, 273)); vec.push_back(ProfileElement(2, 16, 16, 177)); vec.push_back(ProfileElement(2, 16, 32, 137)); vec.push_back(ProfileElement(2, 16, 64, 1000000000)); vec.push_back(ProfileElement(2, 16, 96, 1000000000)); vec.push_back(ProfileElement(2, 16, 128, 1000000000)); vec.push_back(ProfileElement(2, 32, 4, 1000000000)); vec.push_back(ProfileElement(2, 32, 8, 1000000000)); vec.push_back(ProfileElement(2, 32, 16, 1000000000)); vec.push_back(ProfileElement(2, 32, 32, 1000000000)); vec.push_back(ProfileElement(2, 32, 64, 1000000000)); vec.push_back(ProfileElement(2, 32, 96, 1000000000)); vec.push_back(ProfileElement(2, 32, 128, 1000000000)); vec.push_back(ProfileElement(2, 64, 4, 1000000000)); vec.push_back(ProfileElement(2, 64, 8, 1000000000)); vec.push_back(ProfileElement(2, 64, 16, 1000000000)); vec.push_back(ProfileElement(2, 64, 32, 1000000000)); vec.push_back(ProfileElement(2, 64, 64, 1000000000)); vec.push_back(ProfileElement(2, 64, 96, 1000000000)); vec.push_back(ProfileElement(2, 64, 128, 1000000000)); vec.push_back(ProfileElement(2, 96, 4, 1000000000)); vec.push_back(ProfileElement(2, 96, 8, 1000000000)); vec.push_back(ProfileElement(2, 96, 16, 1000000000)); vec.push_back(ProfileElement(2, 96, 32, 1000000000)); vec.push_back(ProfileElement(2, 96, 64, 1000000000)); vec.push_back(ProfileElement(2, 96, 96, 1000000000)); vec.push_back(ProfileElement(2, 96, 128, 1000000000)); vec.push_back(ProfileElement(2, 128, 4, 1000000000)); vec.push_back(ProfileElement(2, 128, 8, 1000000000)); vec.push_back(ProfileElement(2, 128, 16, 1000000000)); vec.push_back(ProfileElement(2, 128, 32, 1000000000)); vec.push_back(ProfileElement(2, 128, 64, 1000000000)); vec.push_back(ProfileElement(2, 128, 96, 1000000000)); vec.push_back(ProfileElement(2, 128, 128, 1000000000)); vec.push_back(ProfileElement(3, 4, 4, 10)); vec.push_back(ProfileElement(3, 4, 8, 5)); vec.push_back(ProfileElement(3, 4, 16, 7)); vec.push_back(ProfileElement(3, 4, 32, 7)); vec.push_back(ProfileElement(3, 4, 64, 6)); vec.push_back(ProfileElement(3, 4, 96, 5)); vec.push_back(ProfileElement(3, 4, 128, 5)); vec.push_back(ProfileElement(3, 8, 4, 14)); vec.push_back(ProfileElement(3, 8, 8, 13)); vec.push_back(ProfileElement(3, 8, 16, 13)); vec.push_back(ProfileElement(3, 8, 32, 13)); vec.push_back(ProfileElement(3, 8, 64, 11)); vec.push_back(ProfileElement(3, 8, 96, 11)); vec.push_back(ProfileElement(3, 8, 128, 12)); vec.push_back(ProfileElement(3, 16, 4, 37)); vec.push_back(ProfileElement(3, 16, 8, 29)); vec.push_back(ProfileElement(3, 16, 16, 21)); vec.push_back(ProfileElement(3, 16, 32, 19)); vec.push_back(ProfileElement(3, 16, 64, 14)); vec.push_back(ProfileElement(3, 16, 96, 13)); vec.push_back(ProfileElement(3, 16, 128, 13)); vec.push_back(ProfileElement(3, 32, 4, 69)); vec.push_back(ProfileElement(3, 32, 8, 105)); vec.push_back(ProfileElement(3, 32, 16, 105)); vec.push_back(ProfileElement(3, 32, 32, 49)); vec.push_back(ProfileElement(3, 32, 64, 29)); vec.push_back(ProfileElement(3, 32, 96, 27)); vec.push_back(ProfileElement(3, 32, 128, 39)); vec.push_back(ProfileElement(3, 64, 4, 193)); vec.push_back(ProfileElement(3, 64, 8, 161)); vec.push_back(ProfileElement(3, 64, 16, 137)); vec.push_back(ProfileElement(3, 64, 32, 113)); vec.push_back(ProfileElement(3, 64, 64, 1000000000)); vec.push_back(ProfileElement(3, 64, 96, 1000000000)); vec.push_back(ProfileElement(3, 64, 128, 1000000000)); vec.push_back(ProfileElement(3, 96, 4, 1000000000)); vec.push_back(ProfileElement(3, 96, 8, 305)); vec.push_back(ProfileElement(3, 96, 16, 1000000000)); vec.push_back(ProfileElement(3, 96, 32, 1000000000)); vec.push_back(ProfileElement(3, 96, 64, 1000000000)); vec.push_back(ProfileElement(3, 96, 96, 1000000000)); vec.push_back(ProfileElement(3, 96, 128, 1000000000)); vec.push_back(ProfileElement(3, 128, 4, 1000000000)); vec.push_back(ProfileElement(3, 128, 8, 1000000000)); vec.push_back(ProfileElement(3, 128, 16, 1000000000)); vec.push_back(ProfileElement(3, 128, 32, 1000000000)); vec.push_back(ProfileElement(3, 128, 64, 1000000000)); vec.push_back(ProfileElement(3, 128, 96, 1000000000)); vec.push_back(ProfileElement(3, 128, 128, 1000000000)); vec.push_back(ProfileElement(4, 4, 4, 7)); vec.push_back(ProfileElement(4, 4, 8, 7)); vec.push_back(ProfileElement(4, 4, 16, 5)); vec.push_back(ProfileElement(4, 4, 32, 6)); vec.push_back(ProfileElement(4, 4, 64, 5)); vec.push_back(ProfileElement(4, 4, 96, 5)); vec.push_back(ProfileElement(4, 4, 128, 5)); vec.push_back(ProfileElement(4, 8, 4, 14)); vec.push_back(ProfileElement(4, 8, 8, 12)); vec.push_back(ProfileElement(4, 8, 16, 5)); vec.push_back(ProfileElement(4, 8, 32, 6)); vec.push_back(ProfileElement(4, 8, 64, 6)); vec.push_back(ProfileElement(4, 8, 96, 6)); vec.push_back(ProfileElement(4, 8, 128, 5)); vec.push_back(ProfileElement(4, 16, 4, 14)); vec.push_back(ProfileElement(4, 16, 8, 14)); vec.push_back(ProfileElement(4, 16, 16, 13)); vec.push_back(ProfileElement(4, 16, 32, 13)); vec.push_back(ProfileElement(4, 16, 64, 13)); vec.push_back(ProfileElement(4, 16, 96, 13)); vec.push_back(ProfileElement(4, 16, 128, 13)); vec.push_back(ProfileElement(4, 32, 4, 37)); vec.push_back(ProfileElement(4, 32, 8, 31)); vec.push_back(ProfileElement(4, 32, 16, 29)); vec.push_back(ProfileElement(4, 32, 32, 21)); vec.push_back(ProfileElement(4, 32, 64, 21)); vec.push_back(ProfileElement(4, 32, 96, 29)); vec.push_back(ProfileElement(4, 32, 128, 21)); vec.push_back(ProfileElement(4, 64, 4, 137)); vec.push_back(ProfileElement(4, 64, 8, 113)); vec.push_back(ProfileElement(4, 64, 16, 89)); vec.push_back(ProfileElement(4, 64, 32, 69)); vec.push_back(ProfileElement(4, 64, 64, 45)); vec.push_back(ProfileElement(4, 64, 96, 37)); vec.push_back(ProfileElement(4, 64, 128, 35)); vec.push_back(ProfileElement(4, 96, 4, 137)); vec.push_back(ProfileElement(4, 96, 8, 113)); vec.push_back(ProfileElement(4, 96, 16, 105)); vec.push_back(ProfileElement(4, 96, 32, 77)); vec.push_back(ProfileElement(4, 96, 64, 53)); vec.push_back(ProfileElement(4, 96, 96, 45)); vec.push_back(ProfileElement(4, 96, 128, 39)); vec.push_back(ProfileElement(4, 128, 4, 137)); vec.push_back(ProfileElement(4, 128, 8, 121)); vec.push_back(ProfileElement(4, 128, 16, 153)); vec.push_back(ProfileElement(4, 128, 32, 97)); vec.push_back(ProfileElement(4, 128, 64, 1000000000)); vec.push_back(ProfileElement(4, 128, 96, 1000000000)); vec.push_back(ProfileElement(4, 128, 128, 1000000000)); vec.push_back(ProfileElement(5, 4, 4, 8)); vec.push_back(ProfileElement(5, 4, 8, 9)); vec.push_back(ProfileElement(5, 4, 16, 5)); vec.push_back(ProfileElement(5, 4, 32, 5)); vec.push_back(ProfileElement(5, 4, 64, 5)); vec.push_back(ProfileElement(5, 4, 96, 5)); vec.push_back(ProfileElement(5, 4, 128, 5)); vec.push_back(ProfileElement(5, 8, 4, 7)); vec.push_back(ProfileElement(5, 8, 8, 6)); vec.push_back(ProfileElement(5, 8, 16, 5)); vec.push_back(ProfileElement(5, 8, 32, 5)); vec.push_back(ProfileElement(5, 8, 64, 5)); vec.push_back(ProfileElement(5, 8, 96, 5)); vec.push_back(ProfileElement(5, 8, 128, 5)); vec.push_back(ProfileElement(5, 16, 4, 21)); vec.push_back(ProfileElement(5, 16, 8, 12)); vec.push_back(ProfileElement(5, 16, 16, 12)); vec.push_back(ProfileElement(5, 16, 32, 11)); vec.push_back(ProfileElement(5, 16, 64, 11)); vec.push_back(ProfileElement(5, 16, 96, 11)); vec.push_back(ProfileElement(5, 16, 128, 11)); vec.push_back(ProfileElement(5, 32, 4, 23)); vec.push_back(ProfileElement(5, 32, 8, 14)); vec.push_back(ProfileElement(5, 32, 16, 14)); vec.push_back(ProfileElement(5, 32, 32, 13)); vec.push_back(ProfileElement(5, 32, 64, 13)); vec.push_back(ProfileElement(5, 32, 96, 13)); vec.push_back(ProfileElement(5, 32, 128, 13)); vec.push_back(ProfileElement(5, 64, 4, 77)); vec.push_back(ProfileElement(5, 64, 8, 39)); vec.push_back(ProfileElement(5, 64, 16, 37)); vec.push_back(ProfileElement(5, 64, 32, 29)); vec.push_back(ProfileElement(5, 64, 64, 29)); vec.push_back(ProfileElement(5, 64, 96, 21)); vec.push_back(ProfileElement(5, 64, 128, 21)); vec.push_back(ProfileElement(5, 96, 4, 113)); vec.push_back(ProfileElement(5, 96, 8, 77)); vec.push_back(ProfileElement(5, 96, 16, 61)); vec.push_back(ProfileElement(5, 96, 32, 39)); vec.push_back(ProfileElement(5, 96, 64, 37)); vec.push_back(ProfileElement(5, 96, 96, 31)); vec.push_back(ProfileElement(5, 96, 128, 29)); vec.push_back(ProfileElement(5, 128, 4, 113)); vec.push_back(ProfileElement(5, 128, 8, 97)); vec.push_back(ProfileElement(5, 128, 16, 69)); vec.push_back(ProfileElement(5, 128, 32, 53)); vec.push_back(ProfileElement(5, 128, 64, 39)); vec.push_back(ProfileElement(5, 128, 96, 31)); vec.push_back(ProfileElement(5, 128, 128, 31)); vec.push_back(ProfileElement(6, 4, 4, 7)); vec.push_back(ProfileElement(6, 4, 8, 3)); vec.push_back(ProfileElement(6, 4, 16, 5)); vec.push_back(ProfileElement(6, 4, 32, 4)); vec.push_back(ProfileElement(6, 4, 64, 5)); vec.push_back(ProfileElement(6, 4, 96, 4)); vec.push_back(ProfileElement(6, 4, 128, 4)); vec.push_back(ProfileElement(6, 8, 4, 11)); vec.push_back(ProfileElement(6, 8, 8, 5)); vec.push_back(ProfileElement(6, 8, 16, 5)); vec.push_back(ProfileElement(6, 8, 32, 5)); vec.push_back(ProfileElement(6, 8, 64, 5)); vec.push_back(ProfileElement(6, 8, 96, 5)); vec.push_back(ProfileElement(6, 8, 128, 5)); vec.push_back(ProfileElement(6, 16, 4, 13)); vec.push_back(ProfileElement(6, 16, 8, 11)); vec.push_back(ProfileElement(6, 16, 16, 11)); vec.push_back(ProfileElement(6, 16, 32, 5)); vec.push_back(ProfileElement(6, 16, 64, 5)); vec.push_back(ProfileElement(6, 16, 96, 5)); vec.push_back(ProfileElement(6, 16, 128, 11)); vec.push_back(ProfileElement(6, 32, 4, 21)); vec.push_back(ProfileElement(6, 32, 8, 14)); vec.push_back(ProfileElement(6, 32, 16, 13)); vec.push_back(ProfileElement(6, 32, 32, 13)); vec.push_back(ProfileElement(6, 32, 64, 13)); vec.push_back(ProfileElement(6, 32, 96, 13)); vec.push_back(ProfileElement(6, 32, 128, 13)); vec.push_back(ProfileElement(6, 64, 4, 39)); vec.push_back(ProfileElement(6, 64, 8, 29)); vec.push_back(ProfileElement(6, 64, 16, 29)); vec.push_back(ProfileElement(6, 64, 32, 21)); vec.push_back(ProfileElement(6, 64, 64, 21)); vec.push_back(ProfileElement(6, 64, 96, 21)); vec.push_back(ProfileElement(6, 64, 128, 21)); vec.push_back(ProfileElement(6, 96, 4, 97)); vec.push_back(ProfileElement(6, 96, 8, 61)); vec.push_back(ProfileElement(6, 96, 16, 39)); vec.push_back(ProfileElement(6, 96, 32, 37)); vec.push_back(ProfileElement(6, 96, 64, 29)); vec.push_back(ProfileElement(6, 96, 96, 29)); vec.push_back(ProfileElement(6, 96, 128, 21)); vec.push_back(ProfileElement(6, 128, 4, 77)); vec.push_back(ProfileElement(6, 128, 8, 61)); vec.push_back(ProfileElement(6, 128, 16, 39)); vec.push_back(ProfileElement(6, 128, 32, 37)); vec.push_back(ProfileElement(6, 128, 64, 29)); vec.push_back(ProfileElement(6, 128, 96, 29)); vec.push_back(ProfileElement(6, 128, 128, 23)); vec.push_back(ProfileElement(7, 4, 4, 5)); vec.push_back(ProfileElement(7, 4, 8, 4)); vec.push_back(ProfileElement(7, 4, 16, 4)); vec.push_back(ProfileElement(7, 4, 32, 4)); vec.push_back(ProfileElement(7, 4, 64, 4)); vec.push_back(ProfileElement(7, 4, 96, 4)); vec.push_back(ProfileElement(7, 4, 128, 3)); vec.push_back(ProfileElement(7, 8, 4, 5)); vec.push_back(ProfileElement(7, 8, 8, 5)); vec.push_back(ProfileElement(7, 8, 16, 5)); vec.push_back(ProfileElement(7, 8, 32, 5)); vec.push_back(ProfileElement(7, 8, 64, 5)); vec.push_back(ProfileElement(7, 8, 96, 5)); vec.push_back(ProfileElement(7, 8, 128, 5)); vec.push_back(ProfileElement(7, 16, 4, 13)); vec.push_back(ProfileElement(7, 16, 8, 11)); vec.push_back(ProfileElement(7, 16, 16, 5)); vec.push_back(ProfileElement(7, 16, 32, 5)); vec.push_back(ProfileElement(7, 16, 64, 5)); vec.push_back(ProfileElement(7, 16, 96, 5)); vec.push_back(ProfileElement(7, 16, 128, 5)); vec.push_back(ProfileElement(7, 32, 4, 21)); vec.push_back(ProfileElement(7, 32, 8, 13)); vec.push_back(ProfileElement(7, 32, 16, 13)); vec.push_back(ProfileElement(7, 32, 32, 13)); vec.push_back(ProfileElement(7, 32, 64, 13)); vec.push_back(ProfileElement(7, 32, 96, 13)); vec.push_back(ProfileElement(7, 32, 128, 12)); vec.push_back(ProfileElement(7, 64, 4, 37)); vec.push_back(ProfileElement(7, 64, 8, 21)); vec.push_back(ProfileElement(7, 64, 16, 14)); vec.push_back(ProfileElement(7, 64, 32, 14)); vec.push_back(ProfileElement(7, 64, 64, 14)); vec.push_back(ProfileElement(7, 64, 96, 13)); vec.push_back(ProfileElement(7, 64, 128, 14)); vec.push_back(ProfileElement(7, 96, 4, 61)); vec.push_back(ProfileElement(7, 96, 8, 39)); vec.push_back(ProfileElement(7, 96, 16, 37)); vec.push_back(ProfileElement(7, 96, 32, 31)); vec.push_back(ProfileElement(7, 96, 64, 21)); vec.push_back(ProfileElement(7, 96, 96, 21)); vec.push_back(ProfileElement(7, 96, 128, 21)); vec.push_back(ProfileElement(7, 128, 4, 61)); vec.push_back(ProfileElement(7, 128, 8, 31)); vec.push_back(ProfileElement(7, 128, 16, 37)); vec.push_back(ProfileElement(7, 128, 32, 11)); vec.push_back(ProfileElement(7, 128, 64, 13)); vec.push_back(ProfileElement(7, 128, 96, 23)); vec.push_back(ProfileElement(7, 128, 128, 21)); return vec; } } // namespace fallback } // namespace megdnn // vim: syntax=cpp.doxygen