wangwei
/
MegEngine

/**
 * \file src/core/impl/utils/persistent_cache.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

#include "megbrain/utils/persistent_cache.h"
#include "megbrain/comp_node_env.h"

#include <cstdio>
#include <cstring>

#ifdef WIN32
#define snprintf _snprintf
#endif

#if MGB_CUDA
#include <cuda_runtime_api.h>
#endif

using namespace mgb;

// ================= PersistentCache ======================
std::shared_ptr<PersistentCache> PersistentCache::sm_impl =
        std::make_shared<InMemoryPersistentCache>();

std::shared_ptr<PersistentCache> PersistentCache::set_impl(
        std::shared_ptr<PersistentCache> impl) {
    mgb_assert(impl);
    sm_impl.swap(impl);
    return impl;
}

std::string PersistentCache::make_category_from_comp_node(CompNode comp_node) {
    auto&& env = CompNodeEnv::from_comp_node(comp_node);
    switch (env.property().type) {
#if MGB_CUDA
        case CompNode::DeviceType::CUDA: {
            int drv = -1, cuda_rt = -1;
            MGB_CUDA_CHECK(cudaDriverGetVersion(&drv));
            MGB_CUDA_CHECK(cudaRuntimeGetVersion(&cuda_rt));
            auto&& prop = env.cuda_env().device_prop;
            // note: we do not contain library versions such as cudnn here. They
            // are handled by opr impls in MegDNN
            return ssprintf("plat=cuda;dev=%s;cap=%d.%d,drv=%d;runtime=%d",
                            prop.name, prop.major, prop.minor, drv, cuda_rt);
            break;
        }
#endif
#if MGB_ROCM
        case CompNode::DeviceType::ROCM: {
            int drv = -1, hip_rt = -1;
            MGB_ROCM_CHECK(hipDriverGetVersion(&drv));
            MGB_ROCM_CHECK(hipRuntimeGetVersion(&hip_rt));
            auto&& prop = env.rocm_env().device_prop;
            return ssprintf("plat=rocm;dev=%s;cap=%d.%d,drv=%d;runtime=%d",
                            prop.name, prop.major, prop.minor, drv, hip_rt);
            break;
        }
#endif
        case CompNode::DeviceType::CPU:
            return "plat=cpu";
        default:
            mgb_throw(MegBrainError,
                      "unsupported comp node for persistent cache category");
    }
}

// ================= InMemoryPersistentCache ==================
using Blob = PersistentCache::Blob;
InMemoryPersistentCache::BlobStorage&
InMemoryPersistentCache::BlobStorage::init_data_ref(const Blob& b) {
    data_refhold = std::make_unique<uint8_t[]>(b.size + 1);
    memcpy(data_refhold.get(), b.ptr, b.size);
    data_refhold.get()[b.size] = 0;  // for C-string safety
    ptr = data_refhold.get();
    size = b.size;
    return *this;
}

InMemoryPersistentCache::BlobStorage&
InMemoryPersistentCache::BlobStorage::init_hash() {
    hash = XXHash{}.update(ptr, size).digest();
    return *this;
}

bool InMemoryPersistentCache::BlobStorage::operator==(
        const BlobStorage& rhs) const {
    return size == rhs.size && !memcmp(ptr, rhs.ptr, size);
}

Maybe<Blob> InMemoryPersistentCache::get(const std::string& category,
                                         const Blob& key) {
    decltype(m_cache.begin()) iter0;
    {
        MGB_LOCK_GUARD(m_mtx);
        iter0 = m_cache.find(category);
        if (iter0 == m_cache.end())
            return None;
    }

    BlobStorage key_storage;
    key_storage.Blob::operator=(key);
    key_storage.init_hash();

    MGB_LOCK_GUARD(m_mtx);

    auto iter1 = iter0->second.find(key_storage);
    if (iter1 == iter0->second.end())
        return None;
    return iter1->second;
}

void InMemoryPersistentCache::put(const std::string& category, const Blob& key,
                                  const Blob& value) {
    BlobStorage key_storage;
    key_storage.init_data_ref(key).init_hash();

    MGB_LOCK_GUARD(m_mtx);
    auto size0 = m_cache.size();
    m_cache[category][std::move(key_storage)].init_data_ref(value);
    if (m_cache.size() > size0) {
        mgb_log_debug("new cache category: %s", category.c_str());
    }
}

// ================= AlgoChooserProfileCache ==================
AlgoChooserProfileCache::AlgoChooserProfileCache(
        CompNode cn, const char *opr_type) {
    m_category = "profile:";
    m_category.append(PersistentCache::make_category_from_comp_node(cn));
    m_category.append(":");
    m_category.append(opr_type);
}

#define ENTRY_FMT ":%d;%lg;%zu:"

Maybe<AlgoChooserProfileCache::Result>
AlgoChooserProfileCache::get(const Key &key) {
    auto raw_buf = PersistentCache::inst().get(m_category, key.build_blob());
    if(!raw_buf.valid())
        return None;
    mgb_assert(raw_buf->size <= 1024 * 1024,
            "buf size too large, maybe corrupted data: %p %zu",
            raw_buf->ptr, raw_buf->size);
    auto buf = static_cast<const uint8_t*>(raw_buf->ptr),
         buf_end = buf + raw_buf->size;
    mgb_assert(buf && buf < buf_end,
            "PersistentCache returned invalid value: ptr=%p size=%zu",
            raw_buf->ptr, raw_buf->size);
    auto read_uint32 = [&]() {
        auto next = buf + sizeof(uint32_t);
        mgb_assert(next <= buf_end);
        auto ret = *reinterpret_cast<const uint32_t*>(buf);
        buf = next;
        return ret;
    };

    auto ret_size = read_uint32();
    mgb_assert(static_cast<ptrdiff_t>(ret_size) < buf_end - buf,
            "result size too large (%u), maybe corrupted data",
            ret_size);
    Result ret(ret_size);
    for (auto &&i: ret) {
        // read algo name
        auto size = read_uint32();
        i.algo.resize(size);
        mgb_assert(buf + size < buf_end);
        memcpy(&i.algo[0], buf, size);
        buf += size;

        auto entry_len = read_uint32();
        mgb_assert(buf + entry_len <= buf_end);
        auto nr = sscanf(reinterpret_cast<const char*>(buf), ENTRY_FMT,
                         &i.attribute, &i.time, &i.workspace);
        mgb_assert(nr == 3);
        buf += entry_len;
    }
    mgb_assert(buf == buf_end);
    return ret;
}

void AlgoChooserProfileCache::put(const Key &key, Result &result) {
    mgb_assert(!result.empty());
    auto result_cmp = [](const ResultEntry &a, const ResultEntry &b) {
        return a.time < b.time ||
            (a.time == b.time && a.workspace < b.workspace);
    };
    small_sort(result.begin(), result.end(), result_cmp);

    // remove algos that run slower but use more workspace
    for (size_t i = 1; i < result.size(); ) {
        auto &&prev = result[i - 1];
        auto &&cur = result[i];

        if (prev.workspace <= cur.workspace &&
            prev.attribute == cur.attribute) {
            result.erase(result.begin() + i);
        } else {
            ++i;
        }
    }

    std::string val;
    val.reserve((sizeof(ResultEntry) - sizeof(std::string)) * 2 * result.size());
    auto write_uint32 = [&](uint32_t v) {
        val.append(reinterpret_cast<const char*>(&v), sizeof(v));
    };
    write_uint32(result.size());
    constexpr int SPR_SIZE = 100;
    for (auto &&i: result) {
        // write algo
        write_uint32(i.algo.size());
        auto pos = val.size();
        val.resize(pos + i.algo.size());
        memcpy(&val[pos], i.algo.data(), i.algo.size());

        // write others
        write_uint32(0);
        pos = val.size();
        val.resize(pos + SPR_SIZE);
        uint32_t nr = snprintf(&val[pos], SPR_SIZE, ENTRY_FMT, i.attribute,
                               i.time, i.workspace);
        //! for memory boundary failed, snprintf ret do not contain \0
        nr += 1;
        mgb_assert(nr < SPR_SIZE);
        memcpy(&val[pos - sizeof(uint32_t)], &nr, sizeof(nr));
        val.resize(pos + nr);
    }

    PersistentCache::inst().put(m_category, key.build_blob(),
            {val.data(), val.size()});
}

PersistentCache::Blob AlgoChooserProfileCache::Key::build_blob() const {
    auto &&ret = m_blob_storage;
    if (!m_blob_storage.empty())
        return {ret.data(), ret.size()};

    ret.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size);
    for (size_t i = 0; i < m_inp_layouts_size; ++ i) {
        auto &&ly = m_inp_layouts_ptr[i];
        for (size_t j = 0; j < ly.ndim; ++ j) {
            if (j)
                ret.push_back(',');
            ret.append(std::to_string(ly.shape[j]));
        }
        if (!ly.is_contiguous()) {
            ret.push_back(';');
            for (size_t j = 0; j < ly.ndim; ++ j) {
                if (j)
                    ret.push_back(',');
                ret.append(std::to_string(ly.stride[j]));
            }
        }
        ret.push_back(';');
        ret.append(ly.dtype.name());
        ret.push_back('|');
        mgb_assert(ly.format.is_default() || (ly.format.is_lowbit_aligned() &&
                                              ly.dtype.is_low_bit()),
                   "currently only default format is supported");
    }
    if (m_param_size) {
        ret.append(reinterpret_cast<const char*>(m_param), m_param_size);
    }
    return {ret.data(), ret.size()};
}

#undef ENGRY_FMT

#ifdef WIN32
#undef snprintf
#endif

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}