| @@ -53,9 +53,11 @@ option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON) | |||||
| option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON) | option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON) | ||||
| option(MGE_BUILD_SDK "Build load_and_run" ON) | option(MGE_BUILD_SDK "Build load_and_run" ON) | ||||
| option(MGE_INFERENCE_ONLY "Build inference only library." OFF) | option(MGE_INFERENCE_ONLY "Build inference only library." OFF) | ||||
| option(MGE_WITH_PYTHON_MODULE "Build MegEngine legacy Python Module." OFF) | |||||
| option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) | option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) | ||||
| option(MGE_WITH_ROCM "Enable ROCM support" OFF) | option(MGE_WITH_ROCM "Enable ROCM support" OFF) | ||||
| if(NOT ${MGE_BIN_REDUCE} STREQUAL "") | if(NOT ${MGE_BIN_REDUCE} STREQUAL "") | ||||
| message("build with BIN REDUCE") | message("build with BIN REDUCE") | ||||
| if(MGE_WITH_MINIMUM_SIZE) | if(MGE_WITH_MINIMUM_SIZE) | ||||
| @@ -152,6 +154,14 @@ if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} S | |||||
| endif() | endif() | ||||
| if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
| # for cmake after 3.15.2 | |||||
| cmake_policy(SET CMP0091 NEW) | |||||
| if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | |||||
| set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug") | |||||
| else() | |||||
| set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded") | |||||
| endif() | |||||
| add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1) | add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1) | ||||
| message("-- into windows build...") | message("-- into windows build...") | ||||
| message("-- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") | message("-- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") | ||||
| @@ -285,7 +295,6 @@ if(MGE_WITH_TEST) | |||||
| endif() | endif() | ||||
| if(MGE_BUILD_IMPERATIVE_RT) | if(MGE_BUILD_IMPERATIVE_RT) | ||||
| add_compile_definitions(MGB_ENABLE_IMPERATIVE_RUNTIME) | |||||
| set(CMAKE_CXX_STANDARD 17) | set(CMAKE_CXX_STANDARD 17) | ||||
| endif() | endif() | ||||
| @@ -701,7 +710,8 @@ endif() | |||||
| set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") | ||||
| set(MGB_ENABLE_IMPERATIVE ${MGE_BUILD_IMPERATIVE_RT}) | |||||
| set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script") | |||||
| # Write out megbrain_build_config.h | # Write out megbrain_build_config.h | ||||
| # It defines macros needed by both megbrain and dnn | # It defines macros needed by both megbrain and dnn | ||||
| configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) | configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) | ||||
| @@ -831,3 +841,8 @@ if(MSVC OR WIN32) | |||||
| endif() | endif() | ||||
| endforeach() | endforeach() | ||||
| endif() | endif() | ||||
| if(MGE_WITH_JIT_MLIR) | |||||
| add_subdirectory(tools/mlir/mgb-opt) | |||||
| add_subdirectory(tools/mlir/mgb-file-check) | |||||
| endif() | |||||
| @@ -682,6 +682,53 @@ protected: | |||||
| size_t workspace_in_bytes); | size_t workspace_in_bytes); | ||||
| }; | }; | ||||
| /** | |||||
| * \brief base class for AdaptivePooling | |||||
| */ | |||||
| class AdaptivePoolingBase : public OperatorBase { | |||||
| DEF_OPR_IMPL_CTOR(AdaptivePoolingBase, OperatorBase); | |||||
| DEF_OPR_PARAM(AdaptivePooling); | |||||
| protected: | |||||
| param::Pooling deduce_pooling_param(const TensorLayout& src, | |||||
| const TensorLayout& dst); | |||||
| }; | |||||
| class AdaptivePoolingForward : public AdaptivePoolingBase { | |||||
| DEF_OPR_IMPL(AdaptivePoolingForward, AdaptivePoolingBase, 1, 1); | |||||
| public: | |||||
| /** | |||||
| * \param[in] src input tensor | |||||
| * \param[out] dst output tensor | |||||
| */ | |||||
| virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| virtual size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) = 0; | |||||
| }; | |||||
| using AdaptivePooling = AdaptivePoolingForward; | |||||
| class AdaptivePoolingBackward : public AdaptivePoolingBase { | |||||
| DEF_OPR_IMPL(AdaptivePoolingBackward, AdaptivePoolingBase, 3, 1); | |||||
| public: | |||||
| /** | |||||
| * \param[in] src the `src' parameter in AdaptivePoolingForward::exec | |||||
| * \param[in] dst the `dst' parameter in AdaptivePoolingForward::exec | |||||
| * \param[in] diff the backpropagated gradient wrt. dst | |||||
| * \param[out] grad the backpropagated gradient wrt. src | |||||
| */ | |||||
| virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst, | |||||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| virtual size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst, | |||||
| const TensorLayout& diff, | |||||
| const TensorLayout& grad) = 0; | |||||
| }; | |||||
| /** | /** | ||||
| * \brief base class for Local | * \brief base class for Local | ||||
| */ | */ | ||||
| @@ -179,6 +179,11 @@ pdef('Axis').add_fields('int32', 'axis', 0) | |||||
| add_enum_alias('Format', 'ConvolutionV0') | add_enum_alias('Format', 'ConvolutionV0') | ||||
| ) | ) | ||||
| (pdef('AdaptivePooling'). | |||||
| add_enum_alias('Mode', 'Pooling'). | |||||
| add_enum_alias('Format', 'ConvolutionV0') | |||||
| ) | |||||
| (pdef('LRN', | (pdef('LRN', | ||||
| 'see ImageNet Classification with Deep Convolutional Neural Networks for' | 'see ImageNet Classification with Deep Convolutional Neural Networks for' | ||||
| ' meaning of the fields'). | ' meaning of the fields'). | ||||
| @@ -55,8 +55,12 @@ void AtlasComputingContext::memcpy(void* dst, const void* src, | |||||
| default: | default: | ||||
| megdnn_throw("bad atlas memcpy kind"); | megdnn_throw("bad atlas memcpy kind"); | ||||
| } | } | ||||
| #if MGB_USE_ATLAS_ASYNC_API | |||||
| acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes, | acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes, | ||||
| atlas_kind, m_ctx.stream)); | atlas_kind, m_ctx.stream)); | ||||
| #else | |||||
| acl_check(aclrtMemcpy(dst, size_in_bytes, src, size_in_bytes, atlas_kind)); | |||||
| #endif | |||||
| } | } | ||||
| void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) { | void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) { | ||||
| @@ -65,7 +69,11 @@ void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) { | |||||
| } | } | ||||
| void AtlasComputingContext::synchronize() { | void AtlasComputingContext::synchronize() { | ||||
| #if MGB_USE_ATLAS_ASYNC_API | |||||
| acl_check(aclrtSynchronizeStream(m_ctx.stream)); | acl_check(aclrtSynchronizeStream(m_ctx.stream)); | ||||
| #else | |||||
| return; | |||||
| #endif | |||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -0,0 +1,37 @@ | |||||
| /** | |||||
| * \file dnn/src/common/adaptive_pooling.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "megdnn/opr_param_defs.h" | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/common/utils.h" | |||||
| namespace megdnn { | |||||
| param::Pooling AdaptivePoolingBase::deduce_pooling_param( | |||||
| const TensorLayout& src, const TensorLayout& dst) { | |||||
| megdnn_assert(param().format == param::AdaptivePooling::Format::NCHW); | |||||
| size_t IH = src.shape[2], IW = src.shape[3], OH = dst.shape[2], | |||||
| OW = dst.shape[3]; | |||||
| param::Pooling ret; | |||||
| ret.mode = param().mode; | |||||
| ret.format = param().format; | |||||
| ret.pad_h = ret.pad_w = 0; | |||||
| ret.stride_h = floor(IH / OH); | |||||
| ret.stride_w = floor(IW / OW); | |||||
| ret.window_h = IH - (OH - 1) * ret.stride_h; | |||||
| ret.window_w = IW - (OW - 1) * ret.stride_w; | |||||
| return ret; | |||||
| } | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -392,8 +392,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const { | |||||
| TensorLayout result{dtype, format}; | TensorLayout result{dtype, format}; | ||||
| result.ndim = tshape.ndim; | result.ndim = tshape.ndim; | ||||
| for (size_t i = 0; i < tshape.ndim; i++) { | for (size_t i = 0; i < tshape.ndim; i++) { | ||||
| megdnn_throw_if(!tshape.shape[i], tensor_reshape_error, | |||||
| megdnn_mangle("target shape is 0")); | |||||
| result.shape[i] = tshape.shape[i]; | result.shape[i] = tshape.shape[i]; | ||||
| result.stride[i] = (tshape.shape[i] == 1); | result.stride[i] = (tshape.shape[i] == 1); | ||||
| } | } | ||||
| @@ -409,8 +407,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const { | |||||
| for (size_t i = 0; i < tshape.ndim; ++i) { | for (size_t i = 0; i < tshape.ndim; ++i) { | ||||
| int target_idx = tshape.ndim - i - 1; | int target_idx = tshape.ndim - i - 1; | ||||
| int cur_idx = ndim - i - 1; | int cur_idx = ndim - i - 1; | ||||
| megdnn_throw_if(!tshape.shape[target_idx], tensor_reshape_error, | |||||
| megdnn_mangle("target shape is 0")); | |||||
| size_t cur_shape = (cur_idx >= 0 ? shape[cur_idx] : 1), | size_t cur_shape = (cur_idx >= 0 ? shape[cur_idx] : 1), | ||||
| cur_stride = (cur_idx >= 0 ? stride[cur_idx] : 0); | cur_stride = (cur_idx >= 0 ? stride[cur_idx] : 0); | ||||
| if (tshape.shape[target_idx] != cur_shape) { | if (tshape.shape[target_idx] != cur_shape) { | ||||
| @@ -434,10 +430,16 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const { | |||||
| bool TensorLayout::try_reshape(TensorLayout& result, | bool TensorLayout::try_reshape(TensorLayout& result, | ||||
| const TensorShape& tshp) const { | const TensorShape& tshp) const { | ||||
| megdnn_assert(tshp.ndim); | megdnn_assert(tshp.ndim); | ||||
| bool is_empty_shape = false; | |||||
| for (size_t i = 0; i < tshp.ndim; ++i) { | for (size_t i = 0; i < tshp.ndim; ++i) { | ||||
| megdnn_throw_if(!tshp.shape[i], tensor_reshape_error, | |||||
| megdnn_mangle(ssprintf("bad target tshp: %s", | |||||
| tshp.to_string().c_str()))); | |||||
| if (!tshp.shape[i]) { | |||||
| megdnn_throw_if(!format.is_default(), tensor_reshape_error, | |||||
| megdnn_mangle(ssprintf("bad target tshp: %s", | |||||
| tshp.to_string().c_str()))); | |||||
| is_empty_shape = true; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| megdnn_throw_if( | megdnn_throw_if( | ||||
| @@ -454,6 +456,11 @@ bool TensorLayout::try_reshape(TensorLayout& result, | |||||
| result.format = this->format; | result.format = this->format; | ||||
| result.TensorShape::operator=(tshp); | result.TensorShape::operator=(tshp); | ||||
| if (is_empty_shape) { | |||||
| result.init_contiguous_stride(); | |||||
| return true; | |||||
| } | |||||
| size_t sdim = 0, prod = 1, cont_sdim = 0; | size_t sdim = 0, prod = 1, cont_sdim = 0; | ||||
| for (size_t i = 0; i < tshp.ndim; ++i) { | for (size_t i = 0; i < tshp.ndim; ++i) { | ||||
| megdnn_assert(cont_sdim < cont.ndim); | megdnn_assert(cont_sdim < cont.ndim); | ||||
| @@ -199,6 +199,8 @@ private: | |||||
| cb(Remap) \ | cb(Remap) \ | ||||
| cb(RemapBackwardData) \ | cb(RemapBackwardData) \ | ||||
| cb(RemapBackwardMat) \ | cb(RemapBackwardMat) \ | ||||
| cb(AdaptivePoolingForward) \ | |||||
| cb(AdaptivePoolingBackward) \ | |||||
| /*! | /*! | ||||
| * \brief specialize HandleImpl::create_operator for a single opr type; | * \brief specialize HandleImpl::create_operator for a single opr type; | ||||
| @@ -0,0 +1,53 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/adaptive_pooling/opr_impl.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/cuda/adaptive_pooling/opr_impl.h" | |||||
| #include "src/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src, | |||||
| _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) { | |||||
| auto opr = handle()->create_operator<PoolingForward>(); | |||||
| opr->param() = deduce_pooling_param(src.layout, dst.layout); | |||||
| opr->exec(src, dst, workspace); | |||||
| } | |||||
| size_t AdaptivePoolingForwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst) { | |||||
| auto opr = handle()->create_operator<PoolingForward>(); | |||||
| opr->param() = deduce_pooling_param(src, dst); | |||||
| return opr->get_workspace_in_bytes(src, dst); | |||||
| } | |||||
| void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src, | |||||
| _megdnn_tensor_in dst, | |||||
| _megdnn_tensor_in diff, | |||||
| _megdnn_tensor_out grad, | |||||
| _megdnn_workspace workspace) { | |||||
| auto opr = handle()->create_operator<PoolingBackward>(); | |||||
| opr->param() = deduce_pooling_param(src.layout, dst.layout); | |||||
| opr->exec(src, dst, diff, grad, workspace); | |||||
| } | |||||
| size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst, | |||||
| const TensorLayout& diff, const TensorLayout& grad) { | |||||
| auto opr = handle()->create_operator<PoolingBackward>(); | |||||
| opr->param() = deduce_pooling_param(src, dst); | |||||
| return opr->get_workspace_in_bytes(src, dst, diff, grad); | |||||
| } | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,44 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/adaptive_pooling/opr_impl.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/cuda/cudnn_wrapper.h" | |||||
| #include "src/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| class AdaptivePoolingForwardImpl final : public AdaptivePoolingForward { | |||||
| public: | |||||
| using AdaptivePoolingForward::AdaptivePoolingForward; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) override; | |||||
| }; | |||||
| class AdaptivePoolingBackwardImpl final : public AdaptivePoolingBackward { | |||||
| public: | |||||
| using AdaptivePoolingBackward::AdaptivePoolingBackward; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst, | |||||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst, | |||||
| const TensorLayout& diff, | |||||
| const TensorLayout& grad) override; | |||||
| }; | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -11,6 +11,7 @@ | |||||
| #include "src/common/handle_impl.h" | #include "src/common/handle_impl.h" | ||||
| #include "src/cuda/adaptive_pooling/opr_impl.h" | |||||
| #include "src/cuda/add_update/opr_impl.h" | #include "src/cuda/add_update/opr_impl.h" | ||||
| #include "src/cuda/argmxx/opr_impl.h" | #include "src/cuda/argmxx/opr_impl.h" | ||||
| #include "src/cuda/argsort/opr_impl.h" | #include "src/cuda/argsort/opr_impl.h" | ||||
| @@ -72,6 +72,7 @@ namespace indexing_multi_axis_vec { | |||||
| #define cb0(_dtype) \ | #define cb0(_dtype) \ | ||||
| MEGDNN_FOREACH_TENSOR_NDIM(INST, DTypeTrait<_dtype>::ctype) | MEGDNN_FOREACH_TENSOR_NDIM(INST, DTypeTrait<_dtype>::ctype) | ||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb0) | MEGDNN_FOREACH_COMPUTING_DTYPE(cb0) | ||||
| cb0(::megdnn::dtype::Bool) | |||||
| #undef cb0 | #undef cb0 | ||||
| #undef INST | #undef INST | ||||
| @@ -39,6 +39,11 @@ __device__ void atomicAdd(megdnn::dt_int16 *, megdnn::dt_int16) { | |||||
| ((int*)0)[0] = 1; | ((int*)0)[0] = 1; | ||||
| } | } | ||||
| __device__ void atomicAdd(megdnn::dt_bool *, megdnn::dt_bool) { | |||||
| __trap(); | |||||
| ((int*)0)[0] = 1; | |||||
| } | |||||
| #define KERN_APPLY_OPR_OPR \ | #define KERN_APPLY_OPR_OPR \ | ||||
| ::megdnn::cuda::indexing_multi_axis_vec::OprAtomicIncr | ::megdnn::cuda::indexing_multi_axis_vec::OprAtomicIncr | ||||
| #include "./kern_apply_opr_impl.cuinl" | #include "./kern_apply_opr_impl.cuinl" | ||||
| @@ -120,6 +120,7 @@ void ExecImpl<Opr>::dispatch_exec() { | |||||
| case DTypeTrait<_dtype>::enumv: \ | case DTypeTrait<_dtype>::enumv: \ | ||||
| return dispatch_exec_ctype<DTypeTrait<_dtype>::ctype>(); | return dispatch_exec_ctype<DTypeTrait<_dtype>::ctype>(); | ||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | ||||
| cb(::megdnn::dtype::Bool) | |||||
| #undef cb | #undef cb | ||||
| default: | default: | ||||
| megdnn_throw("bad dtype"); | megdnn_throw("bad dtype"); | ||||
| @@ -0,0 +1,52 @@ | |||||
| /** | |||||
| * \file dnn/src/naive/adaptive_pooling/opr_impl.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/naive/adaptive_pooling/opr_impl.h" | |||||
| #include "src/common/opr_delegate.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "src/naive/handle.h" | |||||
| namespace megdnn { | |||||
| namespace naive { | |||||
| void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src, | |||||
| _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) { | |||||
| MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), { | |||||
| auto opr = inplace_cpu_handle()->create_operator<PoolingForward>(); | |||||
| opr->param() = deduce_pooling_param(src.layout, dst.layout); | |||||
| opr->exec(src, dst, workspace); | |||||
| }); | |||||
| } | |||||
| void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src, | |||||
| _megdnn_tensor_in dst, | |||||
| _megdnn_tensor_in diff, | |||||
| _megdnn_tensor_out grad, | |||||
| _megdnn_workspace workspace) { | |||||
| MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), { | |||||
| auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>(); | |||||
| opr->param() = deduce_pooling_param(src.layout, dst.layout); | |||||
| opr->exec(src, dst, diff, grad, workspace); | |||||
| }); | |||||
| } | |||||
| size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& src, const TensorLayout& dst, | |||||
| const TensorLayout& diff, const TensorLayout& grad) { | |||||
| auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>(); | |||||
| opr->param() = deduce_pooling_param(src, dst); | |||||
| return opr->get_workspace_in_bytes(src, dst, diff, grad); | |||||
| } | |||||
| } // namespace naive | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * \file dnn/src/naive/adaptive_pooling/opr_impl.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| #include "src/common/utils.h" | |||||
| namespace megdnn { | |||||
| namespace naive { | |||||
| class AdaptivePoolingForwardImpl : public AdaptivePoolingForward { | |||||
| public: | |||||
| using AdaptivePoolingForward::AdaptivePoolingForward; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout&, | |||||
| const TensorLayout&) override { | |||||
| return 0; | |||||
| } | |||||
| }; | |||||
| class AdaptivePoolingBackwardImpl : public AdaptivePoolingBackward { | |||||
| public: | |||||
| using AdaptivePoolingBackward::AdaptivePoolingBackward; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst, | |||||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||||
| _megdnn_workspace workspace) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst, | |||||
| const TensorLayout& diff, | |||||
| const TensorLayout& grad) override; | |||||
| }; | |||||
| } // namespace naive | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -13,6 +13,7 @@ | |||||
| #include "src/common/handle_impl.h" | #include "src/common/handle_impl.h" | ||||
| #include "src/naive/adaptive_pooling/opr_impl.h" | |||||
| #include "src/naive/add_update/opr_impl.h" | #include "src/naive/add_update/opr_impl.h" | ||||
| #include "src/naive/argmxx/opr_impl.h" | #include "src/naive/argmxx/opr_impl.h" | ||||
| #include "src/naive/argsort/opr_impl.h" | #include "src/naive/argsort/opr_impl.h" | ||||
| @@ -88,6 +88,7 @@ void dispatch_exec(HandleImpl *handle, | |||||
| } | } | ||||
| switch (data.layout.dtype.enumv()) { | switch (data.layout.dtype.enumv()) { | ||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | ||||
| cb(::megdnn::dtype::Bool) | |||||
| default: | default: | ||||
| megdnn_throw(megdnn_mangle("bad dtype")); | megdnn_throw(megdnn_mangle("bad dtype")); | ||||
| } | } | ||||
| @@ -0,0 +1,55 @@ | |||||
| /** | |||||
| * \file dnn/test/common/adaptive_pooling.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include <cstddef> | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "megdnn/opr_param_defs.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| namespace adaptive_pooling { | |||||
| struct TestArg { | |||||
| param::AdaptivePooling param; | |||||
| TensorShape ishape; | |||||
| TensorShape oshape; | |||||
| TestArg(param::AdaptivePooling param, TensorShape ishape, | |||||
| TensorShape oshape) | |||||
| : param(param), ishape(ishape), oshape(oshape) {} | |||||
| }; | |||||
| inline std::vector<TestArg> get_args() { | |||||
| std::vector<TestArg> args; | |||||
| using Param = param::AdaptivePooling; | |||||
| using Mode = param::AdaptivePooling::Mode; | |||||
| for (size_t i = 36; i < 40; ++i) { | |||||
| args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1}, | |||||
| TensorShape{2, 3, i - 4, i - 2}); | |||||
| args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1}, | |||||
| TensorShape{2, 3, i - 4, i - 2}); | |||||
| } | |||||
| for (size_t i = 5; i < 10; ++i) { | |||||
| args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1}, | |||||
| TensorShape{2, 3, i - 3, i - 2}); | |||||
| args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1}, | |||||
| TensorShape{2, 3, i - 3, i - 2}); | |||||
| } | |||||
| return args; | |||||
| } | |||||
| } // namespace adaptive_pooling | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -41,6 +41,8 @@ DEF(Images2NeibsForward, 2, true, true); | |||||
| DEF(Images2NeibsBackward, 2, true, false); | DEF(Images2NeibsBackward, 2, true, false); | ||||
| DEF(PoolingForward, 2, true, true); | DEF(PoolingForward, 2, true, true); | ||||
| DEF(PoolingBackward, 4, true, false); | DEF(PoolingBackward, 4, true, false); | ||||
| DEF(AdaptivePoolingForward, 2, true, false); | |||||
| DEF(AdaptivePoolingBackward, 4, true, false); | |||||
| DEF(LocalForward, 3, true, true); | DEF(LocalForward, 3, true, true); | ||||
| DEF(LocalBackwardData, 3, true, false); | DEF(LocalBackwardData, 3, true, false); | ||||
| DEF(LocalBackwardFilter, 3, true, false); | DEF(LocalBackwardFilter, 3, true, false); | ||||
| @@ -0,0 +1,97 @@ | |||||
| /** | |||||
| * \file dnn/test/cuda/adaptive_pooling.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "test/cuda/fixture.h" | |||||
| #include "megdnn/tensor_iter.h" | |||||
| #include "test/common/adaptive_pooling.h" | |||||
| #include "test/common/checker.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "test/cuda/utils.h" | |||||
| #include <cudnn.h> | |||||
| #include "test/cuda/benchmark.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| TEST_F(CUDA, ADAPTIVE_POOLING_FORWARD) { | |||||
| auto args = adaptive_pooling::get_args(); | |||||
| using Format = param::AdaptivePooling::Format; | |||||
| DType dtype = dtype::Float32(); | |||||
| for (auto&& arg : args) { | |||||
| auto param = arg.param; | |||||
| auto src = arg.ishape; | |||||
| auto dst = arg.oshape; | |||||
| param.format = Format::NCHW; | |||||
| Checker<AdaptivePooling> checker(handle_cuda()); | |||||
| checker.set_epsilon(1e-2); | |||||
| checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec( | |||||
| TensorShapeArray{src, dst, {}}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, ADAPTIVE_POOLING_BACKWARD) { | |||||
| auto args = adaptive_pooling::get_args(); | |||||
| for (auto&& arg : args) { | |||||
| Checker<AdaptivePoolingBackward> checker(handle_cuda()); | |||||
| TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32()); | |||||
| TensorLayout olayout = TensorLayout(arg.oshape, dtype::Float32()); | |||||
| auto constraint = [this, | |||||
| arg](CheckerHelper::TensorValueArray& tensors_orig) { | |||||
| megdnn_assert(tensors_orig.size() == 4); | |||||
| auto opr = handle_cuda()->create_operator<AdaptivePoolingForward>(); | |||||
| opr->param() = arg.param; | |||||
| auto tensors_cuda_storage = CheckerHelper::alloc_tensors( | |||||
| handle_cuda(), | |||||
| {tensors_orig[0].layout, tensors_orig[1].layout}, 0); | |||||
| auto&& tensors_cuda = *tensors_cuda_storage; | |||||
| auto span = tensors_cuda[0].layout.span(); | |||||
| auto dst = static_cast<dt_byte*>(tensors_cuda[0].raw_ptr) + | |||||
| span.low_byte; | |||||
| auto src = static_cast<const dt_byte*>(tensors_orig[0].raw_ptr) + | |||||
| span.low_byte; | |||||
| megdnn_memcpy_H2D(handle_cuda(), dst, src, span.dist_byte()); | |||||
| auto workspace_size = opr->get_workspace_in_bytes( | |||||
| tensors_cuda[0].layout, tensors_cuda[1].layout); | |||||
| auto workspace_cuda = megdnn_malloc(handle_cuda(), workspace_size); | |||||
| Workspace workspace{static_cast<dt_byte*>(workspace_cuda), | |||||
| workspace_size}; | |||||
| opr->exec(tensors_cuda[0], tensors_cuda[1], workspace); | |||||
| megdnn_free(handle_cuda(), workspace_cuda); | |||||
| span = tensors_cuda[1].layout.span(); | |||||
| dst = static_cast<dt_byte*>(tensors_orig[1].raw_ptr) + | |||||
| span.low_byte; | |||||
| src = static_cast<const dt_byte*>(tensors_cuda[1].raw_ptr) + | |||||
| span.low_byte; | |||||
| megdnn_memcpy_D2H(handle_cuda(), dst, src, span.dist_byte()); | |||||
| }; | |||||
| DType dtype = dtype::Float32(); | |||||
| checker.set_tensors_constraint(constraint) | |||||
| .set_dtype(0, dtype) | |||||
| .set_dtype(1, dtype) | |||||
| .set_dtype(2, dtype) | |||||
| .set_dtype(3, dtype) | |||||
| .set_param(arg.param) | |||||
| .exec(TensorShapeArray{ilayout, olayout, olayout, ilayout}); | |||||
| } | |||||
| } | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -6,7 +6,8 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, | * Unless required by applicable law or agreed to in writing, | ||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | */ | ||||
| #include "megdnn/oprs/nn.h" | #include "megdnn/oprs/nn.h" | ||||
| @@ -37,7 +38,7 @@ std::vector<BenchArgs> get_resnet50_bench_args(size_t batch = 64) { | |||||
| args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 1}); | args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 1}); | ||||
| args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 2}); | args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 2}); | ||||
| args.emplace_back(BenchArgs{batch, 4, 256, 256, 32, 7, 2}); | args.emplace_back(BenchArgs{batch, 4, 256, 256, 32, 7, 2}); | ||||
| args.emplace_back(BenchArgs{batch, 256, 56, 56, 64, 1, 1}); | args.emplace_back(BenchArgs{batch, 256, 56, 56, 64, 1, 1}); | ||||
| args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 1, 1}); | args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 1, 1}); | ||||
| args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 3, 1}); | args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 3, 1}); | ||||
| @@ -614,11 +615,8 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_HSWISH) { | |||||
| param.stride_h = param.stride_w = 1; | param.stride_h = param.stride_w = 1; | ||||
| param.format = param::ConvBias::Format::CHWN4; | param.format = param::ConvBias::Format::CHWN4; | ||||
| param.nonlineMode = param::ConvBias::NonlineMode::H_SWISH; | param.nonlineMode = param::ConvBias::NonlineMode::H_SWISH; | ||||
| checker.set_param(param).execs({{4, 12, 12, 32, 4}, | |||||
| {4, 3, 3, 16, 4}, | |||||
| {4, 1, 1, 1, 4}, | |||||
| {}, | |||||
| {}}); | |||||
| checker.set_param(param).execs( | |||||
| {{4, 12, 12, 32, 4}, {4, 3, 3, 16, 4}, {4, 1, 1, 1, 4}, {}, {}}); | |||||
| } | } | ||||
| TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_CHECK_BOUNDS) { | TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_CHECK_BOUNDS) { | ||||
| @@ -1076,7 +1074,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) { | |||||
| } | } | ||||
| #if CUDA_VERSION >= 10020 | #if CUDA_VERSION >= 10020 | ||||
| /// \note: we only check several cases and block sizes in megdnn_test, the full | /// \note: we only check several cases and block sizes in megdnn_test, the full | ||||
| /// testcases are written in cutlass repository | /// testcases are written in cutlass repository | ||||
| @@ -1234,8 +1231,7 @@ TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW4) { | |||||
| handle_cuda(), get_resnet50_bench_args(64), | handle_cuda(), get_resnet50_bench_args(64), | ||||
| dtype::QuantizedS8{1.2f}, dtype::QuantizedS8{1.3f}, | dtype::QuantizedS8{1.2f}, dtype::QuantizedS8{1.3f}, | ||||
| dtype::QuantizedS32{1.2f * 1.3f}, dtype::QuantizedS8{1.0f}, | dtype::QuantizedS32{1.2f * 1.3f}, dtype::QuantizedS8{1.0f}, | ||||
| "INT8_NCHW4_DOTPROD_IMPLICIT_GEMM", | |||||
| param::ConvBias::Format::NCHW4); | |||||
| "INT8_NCHW4_DOTPROD_IMPLICIT_GEMM", param::ConvBias::Format::NCHW4); | |||||
| } | } | ||||
| #endif | #endif | ||||
| } // namespace test | } // namespace test | ||||
| @@ -47,8 +47,7 @@ add_custom_target(gen_opr_py DEPENDS ${GEN_OPS_FILE}) | |||||
| ##################### end of opdef generation ######################### | ##################### end of opdef generation ######################### | ||||
| set(VERSION_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/version.ld) | |||||
| add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) | |||||
| add_custom_target(_version_ld SOURCES ${MGE_VERSION_SCRIPT}) | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | ||||
| pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | ||||
| @@ -57,8 +56,21 @@ if (APPLE) | |||||
| elseif (MSVC OR WIN32) | elseif (MSVC OR WIN32) | ||||
| # Windows does not support implicitly importing data members from DLL. | # Windows does not support implicitly importing data members from DLL. | ||||
| target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn) | target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn) | ||||
| message("-- CMAKE_MSVC_RUNTIME_LIBRARY: ${CMAKE_MSVC_RUNTIME_LIBRARY}") | |||||
| set_target_properties(${MODULE_NAME} PROPERTIES MSVC_RUNTIME_LIBRARY "${CMAKE_MSVC_RUNTIME_LIBRARY}") | |||||
| else() | else() | ||||
| target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${VERSION_SCRIPT}) | |||||
| if (MGE_WITH_PYTHON_MODULE) | |||||
| # use to fix runtime crash when build both mgb(MGE_WITH_PYTHON_MODULE) and imperative(MGE_BUILD_IMPERATIVE_RT) | |||||
| target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${MGE_VERSION_SCRIPT}) | |||||
| else() | |||||
| # use to reduce whl size by depend on megbrain/dnn directly, caused by cmake create two cuda fatbin | |||||
| # elf section on both megengine_export and target which depend on megengine_export | |||||
| target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn -Wl,--version-script=${MGE_VERSION_SCRIPT}) | |||||
| if (MGE_WITH_DISTRIBUTED) | |||||
| message("-- Imperative configured to link megray") | |||||
| target_link_libraries(${MODULE_NAME} PRIVATE megray) | |||||
| endif() | |||||
| endif() | |||||
| endif() | endif() | ||||
| target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) | target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) | ||||
| @@ -76,7 +76,7 @@ from .logger import enable_debug_log, get_logger, set_log_file, set_log_level | |||||
| from .serialization import load, save | from .serialization import load, save | ||||
| from .tensor import Parameter, Tensor, tensor | from .tensor import Parameter, Tensor, tensor | ||||
| from .version import __version__ | from .version import __version__ | ||||
| from .core import cgtools | |||||
| from .utils import comp_graph_tools as cgtools | |||||
| _set_fork_exec_path_for_timed_func( | _set_fork_exec_path_for_timed_func( | ||||
| sys.executable, | sys.executable, | ||||
| @@ -20,7 +20,7 @@ class GradManager: | |||||
| the forward operations start and when all resources should be released. A typical usage of | the forward operations start and when all resources should be released. A typical usage of | ||||
| GradManager is as follows: | GradManager is as follows: | ||||
| .. codeblock:: | |||||
| .. code-block:: | |||||
| gm = GradManager() | gm = GradManager() | ||||
| gm.attach(model.parameters()) | gm.attach(model.parameters()) | ||||
| @@ -32,7 +32,7 @@ class GradManager: | |||||
| You can also use `record()` and `release()` method instead of `with` context: | You can also use `record()` and `release()` method instead of `with` context: | ||||
| .. codeblock:: | |||||
| .. code-block:: | |||||
| gm = GradManager() | gm = GradManager() | ||||
| gm.attach(model.parameters()) | gm.attach(model.parameters()) | ||||
| @@ -50,7 +50,7 @@ class GradManager: | |||||
| processes. Users will finally get the averaged gradients if an "AllReduce" | processes. Users will finally get the averaged gradients if an "AllReduce" | ||||
| callback is registered as follows: | callback is registered as follows: | ||||
| .. codeblock:: | |||||
| .. code-block:: | |||||
| import megengine.distributed as dist | import megengine.distributed as dist | ||||
| @@ -71,7 +71,7 @@ class GradManager: | |||||
| r"""Registers parameters that gradients should be calculated with respect to. | r"""Registers parameters that gradients should be calculated with respect to. | ||||
| Callback Functions should have a signature like this: | Callback Functions should have a signature like this: | ||||
| .. codeblock:: | |||||
| .. code-block:: | |||||
| def cb(param: Tensor, grad: Tensor) -> Tensor: | def cb(param: Tensor, grad: Tensor) -> Tensor: | ||||
| # do something | # do something | ||||
| @@ -100,6 +100,8 @@ class GradManager: | |||||
| :param ys: outputs of forward operators, e.g., the loss tensor | :param ys: outputs of forward operators, e.g., the loss tensor | ||||
| :param dys: derivatives of ys | :param dys: derivatives of ys | ||||
| """ | """ | ||||
| from ..functional import ones_like | |||||
| global backwarding_grad_manager | global backwarding_grad_manager | ||||
| cache = backwarding_grad_manager | cache = backwarding_grad_manager | ||||
| backwarding_grad_manager = self | backwarding_grad_manager = self | ||||
| @@ -113,7 +115,7 @@ class GradManager: | |||||
| if not isinstance(ys, (tuple, list)): | if not isinstance(ys, (tuple, list)): | ||||
| ys = [ys] | ys = [ys] | ||||
| if dys is None: | if dys is None: | ||||
| dys = [tensor(1.0).broadcast(y.shape) for y in ys] | |||||
| dys = [ones_like(y) for y in ys] | |||||
| if not isinstance(dys, (tuple, list)): | if not isinstance(dys, (tuple, list)): | ||||
| dys = [dys] | dys = [dys] | ||||
| try: | try: | ||||
| @@ -11,4 +11,3 @@ import sys | |||||
| from .tensor import Tensor | from .tensor import Tensor | ||||
| from .tensor.megbrain_graph import Graph | from .tensor.megbrain_graph import Graph | ||||
| from .utils import comp_graph_tools as cgtools | |||||
| @@ -22,11 +22,13 @@ class Device: | |||||
| else: | else: | ||||
| self._cn = CompNode(device) | self._cn = CompNode(device) | ||||
| self.logical_name = self._cn.logical_name | |||||
| def to_c(self): | def to_c(self): | ||||
| return self._cn | return self._cn | ||||
| def __repr__(self): | def __repr__(self): | ||||
| return "{}({})".format(type(self).__qualname__, self) | |||||
| return "{}({})".format(type(self).__qualname__, repr(self._cn)) | |||||
| def __str__(self): | def __str__(self): | ||||
| return str(self._cn) | return str(self._cn) | ||||
| @@ -160,7 +160,7 @@ def subtensor_grad_fn(op, inputs, outputs, input_requires_grad): | |||||
| def make_grad(grad_op, dy): | def make_grad(grad_op, dy): | ||||
| grad = ( | grad = ( | ||||
| TensorWrapper(0, dtype=dy.dtype, device=dy.device) | TensorWrapper(0, dtype=dy.dtype, device=dy.device) | ||||
| .broadcast(TensorWrapper(input_shape)) | |||||
| ._broadcast(TensorWrapper(input_shape)) | |||||
| .__wrapped__ | .__wrapped__ | ||||
| ) | ) | ||||
| (dx,) = apply(grad_op, grad, dy, *params) | (dx,) = apply(grad_op, grad, dy, *params) | ||||
| @@ -186,7 +186,7 @@ def indexingMultiAxisVec_grad_fn(op, inputs, outputs, input_requires_grad): | |||||
| def make_grad(grad_op, dy): | def make_grad(grad_op, dy): | ||||
| grad = ( | grad = ( | ||||
| TensorWrapper(0, dtype=dy.dtype, device=dy.device) | TensorWrapper(0, dtype=dy.dtype, device=dy.device) | ||||
| .broadcast(TensorWrapper(input_shape)) | |||||
| ._broadcast(TensorWrapper(input_shape)) | |||||
| .__wrapped__ | .__wrapped__ | ||||
| ) | ) | ||||
| (dx,) = apply(grad_op, grad, dy, *params) | (dx,) = apply(grad_op, grad, dy, *params) | ||||
| @@ -50,8 +50,8 @@ class Function: | |||||
| """ | """ | ||||
| Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. | ||||
| :param input: Input tensors. | |||||
| :return: A tuple of Tensor or a single Tensor. | |||||
| :param input: input tensors. | |||||
| :return: a tuple of Tensor or a single Tensor. | |||||
| .. note:: | .. note:: | ||||
| @@ -64,12 +64,12 @@ class Function: | |||||
| """ | """ | ||||
| Compute the gradient of the forward function. It must be overriden by all subclasses. | Compute the gradient of the forward function. It must be overriden by all subclasses. | ||||
| :param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward` | |||||
| :param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`. | |||||
| .. note:: | |||||
| .. note:: | |||||
| In case when some tensors of outputs are not related to loss function, the corresponding | |||||
| values in ``output_grads`` would be ``None``. | |||||
| In case when some tensors of outputs are not related to loss function, the corresponding | |||||
| values in ``output_grads`` would be ``None``. | |||||
| .. note:: | .. note:: | ||||
| @@ -173,7 +173,7 @@ def unpack_getitem(inp, tuple_val, *, allow_newaxis=True): | |||||
| item.append(True) | item.append(True) | ||||
| v = get_index(v) | v = get_index(v) | ||||
| assert np.issubdtype(v.dtype, np.integer) or np.issubdtype( | assert np.issubdtype(v.dtype, np.integer) or np.issubdtype( | ||||
| v.dtype, np.bool | |||||
| v.dtype, np.bool_ | |||||
| ), "var type in the subscript must be int or bool" | ), "var type in the subscript must be int or bool" | ||||
| tensors.append(v) | tensors.append(v) | ||||
| @@ -267,7 +267,7 @@ def setitem(tensor, index, value): | |||||
| value.shape, tmp_result.shape | value.shape, tmp_result.shape | ||||
| ) | ) | ||||
| ) | ) | ||||
| value = value.broadcast(tmp_result.shape) | |||||
| value = value._broadcast(tmp_result.shape) | |||||
| if use_subtensor: | if use_subtensor: | ||||
| op = builtin.SetSubtensor(items=items) | op = builtin.SetSubtensor(items=items) | ||||
| else: | else: | ||||
| @@ -8,6 +8,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import collections | import collections | ||||
| import json | import json | ||||
| import os | |||||
| import threading | import threading | ||||
| import weakref | import weakref | ||||
| from concurrent.futures import Future, ThreadPoolExecutor | from concurrent.futures import Future, ThreadPoolExecutor | ||||
| @@ -49,7 +50,16 @@ class Graph(_imperative_rt.ComputingGraph): | |||||
| def execute(self, *args): | def execute(self, *args): | ||||
| assert self._future is None | assert self._future is None | ||||
| self._future = self._executor.submit(self._function.execute, *args) | |||||
| def wrapped(*args): | |||||
| try: | |||||
| self._function.execute(*args) | |||||
| except Exception as exc: | |||||
| for i in self._function._all_rendezvous: | |||||
| i.set_exception(str(exc)) | |||||
| raise exc | |||||
| self._future = self._executor.submit(wrapped, *args) | |||||
| def wait(self): | def wait(self): | ||||
| assert self._future is not None | assert self._future is not None | ||||
| @@ -275,6 +285,7 @@ def dump_graph( | |||||
| keep_param_name: bool = False, | keep_param_name: bool = False, | ||||
| keep_opr_priority: bool = False, | keep_opr_priority: bool = False, | ||||
| strip_info_file=None, | strip_info_file=None, | ||||
| append_json=False | |||||
| ): | ): | ||||
| """serialize the computing graph of `output_vars` and get byte result. | """serialize the computing graph of `output_vars` and get byte result. | ||||
| @@ -295,6 +306,9 @@ def dump_graph( | |||||
| :param keep_opr_priority: whether to keep priority setting for operators | :param keep_opr_priority: whether to keep priority setting for operators | ||||
| :param strip_info_file: a string for path or a file handler. if is not None, | :param strip_info_file: a string for path or a file handler. if is not None, | ||||
| then the dump information for code strip would be written to ``strip_info_file`` | then the dump information for code strip would be written to ``strip_info_file`` | ||||
| :param append_json: will be check when `strip_info_file` is not None. if set | |||||
| true, the information for code strip will be append to strip_info_file. | |||||
| if set false, will rewrite strip_info_file | |||||
| :return: dump result as byte string, and an instance of namedtuple | :return: dump result as byte string, and an instance of namedtuple | ||||
| :class:`CompGraphDumpResult`, whose fields are: | :class:`CompGraphDumpResult`, whose fields are: | ||||
| @@ -342,10 +356,25 @@ def dump_graph( | |||||
| if strip_info_file is not None: | if strip_info_file is not None: | ||||
| if isinstance(strip_info_file, str): | if isinstance(strip_info_file, str): | ||||
| strip_info_file = open(strip_info_file, "w") | |||||
| strip_info = json.loads(_imperative_rt.get_info_for_strip(ov)) | |||||
| strip_info["hash"] = dump_info.content_hash | |||||
| json.dump(strip_info, strip_info_file) | |||||
| if not os.path.exists(strip_info_file): | |||||
| os.mknod(strip_info_file) | |||||
| strip_info_file = open(strip_info_file, "r+") | |||||
| new_strip_dict = json.loads(_imperative_rt.get_info_for_strip(ov)) | |||||
| ori_strip_dict = new_strip_dict | |||||
| json_content = strip_info_file.read() | |||||
| if append_json and len(json_content) != 0: | |||||
| # if there are contents in json file. Read them first and then append new information | |||||
| ori_strip_dict = json.loads(json_content) | |||||
| for k in ori_strip_dict: | |||||
| new_strip_dict_v = new_strip_dict.get(k) | |||||
| if new_strip_dict_v is not None: | |||||
| for value in new_strip_dict_v: | |||||
| if not value in ori_strip_dict[k]: | |||||
| ori_strip_dict[k].append(value) | |||||
| ori_strip_dict["hash"] = dump_info.content_hash | |||||
| strip_info_file.seek(0) | |||||
| strip_info_file.truncate() | |||||
| json.dump(ori_strip_dict, strip_info_file) | |||||
| return dump_content, dump_info | return dump_content, dump_info | ||||
| @@ -358,7 +387,7 @@ CompGraphLoadResult = collections.namedtuple( | |||||
| def load_graph(fpath): | def load_graph(fpath): | ||||
| """Load a serialized computing graph from file. | """Load a serialized computing graph from file. | ||||
| :parma fpath: Path or Handle for the output file | |||||
| :param fpath: Path or Handle of the input file | |||||
| :return: An instance of namedtuple :class:`CompGraphLoadResult`, | :return: An instance of namedtuple :class:`CompGraphLoadResult`, | ||||
| whose fields are: | whose fields are: | ||||
| @@ -40,6 +40,8 @@ | |||||
| # All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. | # All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. | ||||
| # -------------------------------------------------------------------------------------- | # -------------------------------------------------------------------------------------- | ||||
| from collections import OrderedDict | |||||
| from .utils import _toposort, groupby | from .utils import _toposort, groupby | ||||
| from .variadic import isvariadic | from .variadic import isvariadic | ||||
| @@ -159,5 +161,5 @@ def ordering(signatures): | |||||
| for s in signatures: | for s in signatures: | ||||
| if s not in edges: | if s not in edges: | ||||
| edges[s] = [] | edges[s] = [] | ||||
| edges = dict((k, [b for a, b in v]) for k, v in edges.items()) | |||||
| edges = OrderedDict((k, [b for a, b in v]) for k, v in edges.items()) | |||||
| return _toposort(edges) | return _toposort(edges) | ||||
| @@ -100,6 +100,8 @@ def _(data: DeviceTensorND): | |||||
| @as_raw_tensor.register(np.ndarray) | @as_raw_tensor.register(np.ndarray) | ||||
| def _(array: np.ndarray, dtype=None, device=None): | def _(array: np.ndarray, dtype=None, device=None): | ||||
| device = None if device is None else as_device(device).to_c() | device = None if device is None else as_device(device).to_c() | ||||
| if 0 in array.strides: | |||||
| array = array.squeeze().reshape(array.shape) | |||||
| return RawTensor(put(array, dtype=dtype, device=device)) | return RawTensor(put(array, dtype=dtype, device=device)) | ||||
| @@ -57,7 +57,29 @@ def _transpose(data, axes): | |||||
| def _broadcast(inp, shape): | def _broadcast(inp, shape): | ||||
| def valid_broadcast(src, tar): | |||||
| def failed(): | |||||
| raise ValueError( | |||||
| "the input shape {} can not be broadcasted to target shape {}".format( | |||||
| src, tar | |||||
| ) | |||||
| ) | |||||
| if isinstance(src, (TensorBase, TensorWrapperBase)): | |||||
| src = src.numpy() | |||||
| if isinstance(tar, (TensorBase, TensorWrapperBase)): | |||||
| tar = tar.numpy() | |||||
| if len(src) > len(tar): | |||||
| failed() | |||||
| for i in range(min(len(src), len(tar))): | |||||
| if src[-i - 1] != 1 and src[-i - 1] != tar[-i - 1]: | |||||
| failed() | |||||
| shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device) | shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device) | ||||
| valid_broadcast(inp.shape, shape) | |||||
| (result,) = apply(builtin.Broadcast(), inp, shape) | (result,) = apply(builtin.Broadcast(), inp, shape) | ||||
| return result | return result | ||||
| @@ -158,6 +180,10 @@ def _reduce(mode): | |||||
| def f(self, axis=None, keepdims: bool = False): | def f(self, axis=None, keepdims: bool = False): | ||||
| data = self | data = self | ||||
| (data,) = utils.convert_inputs(data) | (data,) = utils.convert_inputs(data) | ||||
| if mode == "MEAN": | |||||
| data = data.astype("float32") | |||||
| elif self.dtype == np.bool_: | |||||
| data = data.astype("int32") | |||||
| if axis is None: | if axis is None: | ||||
| data = data.reshape(-1) | data = data.reshape(-1) | ||||
| assert not keepdims, "can not set axis=None and keepdims=True" | assert not keepdims, "can not set axis=None and keepdims=True" | ||||
| @@ -180,6 +206,9 @@ def _reduce(mode): | |||||
| if not keepdims: | if not keepdims: | ||||
| result = _remove_axis(result, axis) | result = _remove_axis(result, axis) | ||||
| if self.dtype == np.bool_: | |||||
| if mode in ["MIN", "MAX"]: | |||||
| result = result.astype("bool") | |||||
| return result | return result | ||||
| return f | return f | ||||
| @@ -203,7 +232,8 @@ def _todo(*_): | |||||
| def _expand_args(args): | def _expand_args(args): | ||||
| if len(args) == 1: | if len(args) == 1: | ||||
| if isinstance( | if isinstance( | ||||
| args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase) | |||||
| args[0], | |||||
| (collections.abc.Sequence, TensorBase, TensorWrapperBase, np.ndarray), | |||||
| ): | ): | ||||
| args = args[0] | args = args[0] | ||||
| return args | return args | ||||
| @@ -366,7 +396,8 @@ class ArrayMethodMixin(abc.ABC): | |||||
| def reshape(self, *args): | def reshape(self, *args): | ||||
| return _reshape(self, _expand_args(args)) | return _reshape(self, _expand_args(args)) | ||||
| def broadcast(self, *args): | |||||
| # FIXME: remove this method | |||||
| def _broadcast(self, *args): | |||||
| return _broadcast(self, _expand_args(args)) | return _broadcast(self, _expand_args(args)) | ||||
| def transpose(self, *args): | def transpose(self, *args): | ||||
| @@ -377,7 +408,38 @@ class ArrayMethodMixin(abc.ABC): | |||||
| def flatten(self): | def flatten(self): | ||||
| return self.reshape(-1) | return self.reshape(-1) | ||||
| sum = _reduce("SUM") | |||||
| def sum(self, axis=None, keepdims: bool = False): | |||||
| r"""Returns the sum of each row of the input tensor in the given dimension ``axis``. | |||||
| If ``axis`` is a list of axises, reduce over all of them. | |||||
| If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`). | |||||
| Same for prod/mean/max/min. | |||||
| :param axis: the dimension or dimensions to reduce. | |||||
| :param keepdim: whether the output tensor has ndim retained or not. | |||||
| :return: output tensor. | |||||
| Examples: | |||||
| .. testcode:: | |||||
| from megengine import tensor | |||||
| a = tensor([False, True, True, False]) | |||||
| b = tensor([1.0, 2.0, 3.0, 4.0]) | |||||
| print(a.sum().numpy()) | |||||
| print(b.sum().numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [2] | |||||
| [10.] | |||||
| """ | |||||
| return _reduce("SUM")(self, axis, keepdims) | |||||
| prod = _reduce("PRODUCT") | prod = _reduce("PRODUCT") | ||||
| min = _reduce("MIN") | min = _reduce("MIN") | ||||
| max = _reduce("MAX") | max = _reduce("MAX") | ||||
| @@ -16,39 +16,74 @@ from ..ops.special import Const | |||||
| from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply | from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply | ||||
| def dtype_promotion(raw_inputs): | |||||
| def add_dtype(i): | |||||
| if type(i) == int: | |||||
| return np.array(i, dtype=np.int32) | |||||
| if type(i) == float: | |||||
| return np.array(i, dtype=np.float32) | |||||
| if type(i) == bool: | |||||
| return np.array(i, dtype=np.bool_) | |||||
| return None | |||||
| scalar_inputs = [ | |||||
| add_dtype(i) for i in raw_inputs if not hasattr(i, "dtype") and add_dtype(i) | |||||
| ] | |||||
| inputs = [i for i in raw_inputs if hasattr(i, "dtype")] | |||||
| assert len(scalar_inputs + inputs) > 0 | |||||
| dtype = None | |||||
| if len(inputs) > 0: | |||||
| dtype = np.result_type(*inputs) | |||||
| dtype_all = np.result_type(*(inputs + scalar_inputs)) | |||||
| assert ( | |||||
| dtype != np.float64 and dtype != np.int64 | |||||
| ), "unsupport dtype {} by dtype_promotion, please use explict type convert".format( | |||||
| dtype | |||||
| ) | |||||
| if dtype_all == np.bool_: | |||||
| for i in raw_inputs: | |||||
| if not hasattr(i, "dtype") or i.dtype != np.bool_: | |||||
| raise TypeError( | |||||
| "bool dtype can not be operated with an element without bool dtype" | |||||
| ) | |||||
| if dtype_all == np.float64: | |||||
| dtype_all = np.float32 | |||||
| return dtype_all | |||||
| def dtype_promotion(inputs): | |||||
| """ | |||||
| Returns the dtype that would result from performing an arithmetic | |||||
| operation on the provided input tensors and scalars. | |||||
| """ | |||||
| # map numpy.dtype.kind to priority | |||||
| category_priority = { | |||||
| "f": 3, # floating-point | |||||
| "i": 2, # signed integer | |||||
| "u": 2, # unsigned integer | |||||
| "b": 1, # boolean | |||||
| } | |||||
| def scalar2dtype(x): | |||||
| """ | |||||
| For scalar `x`, returns its corresponding type. A floating point scalar | |||||
| has dtype 'float32'. An integral non-boolean scalar has dtype 'int32'. | |||||
| A boolean scalar has dtype 'bool'. | |||||
| """ | |||||
| if isinstance(x, bool): | |||||
| return np.bool_ | |||||
| if isinstance(x, int): | |||||
| return np.int32 | |||||
| if isinstance(x, float): | |||||
| return np.float32 | |||||
| def promote_types(types, cat): | |||||
| """ | |||||
| Returns the data type with sufficient size to hold all types of | |||||
| category `cat` in the list `types`. | |||||
| """ | |||||
| used_types = [ | |||||
| i for i in types if category_priority.get(np.dtype(i).kind, 0) == cat | |||||
| ] | |||||
| assert len(used_types) > 0 | |||||
| res = used_types[0] | |||||
| for i in used_types: | |||||
| res = np.promote_types(res, i) | |||||
| return res | |||||
| def max_priority(types): | |||||
| """ | |||||
| Returns the maximum value of the priority of each type in the list | |||||
| `types`. | |||||
| """ | |||||
| if not types: | |||||
| return 0 | |||||
| else: | |||||
| return max([category_priority.get(np.dtype(i).kind, 0) for i in types]) | |||||
| scalars = [] | |||||
| tensors = [] | |||||
| for data in inputs: | |||||
| if hasattr(data, "dtype"): | |||||
| tensors.append(data.dtype) | |||||
| elif isinstance(data, (float, int, bool)): | |||||
| scalars.append(scalar2dtype(data)) | |||||
| max_pri_scalars = max_priority(scalars) | |||||
| max_pri_tensors = max_priority(tensors) | |||||
| assert max_pri_scalars > 0 or max_pri_tensors > 0 | |||||
| if max_pri_scalars > max_pri_tensors: | |||||
| return promote_types(scalars, max_pri_scalars) | |||||
| else: | |||||
| return promote_types(tensors, max_pri_tensors) | |||||
| def get_device(inputs): | def get_device(inputs): | ||||
| @@ -1,9 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| from .comp_graph_tools import * | |||||
| @@ -26,7 +26,7 @@ def _clear_plasma_store(): | |||||
| # `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess, | # `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess, | ||||
| # so this function should be called explicitly | # so this function should be called explicitly | ||||
| global MGE_PLASMA_STORE_MANAGER | global MGE_PLASMA_STORE_MANAGER | ||||
| if MGE_PLASMA_STORE_MANAGER is not None: | |||||
| if MGE_PLASMA_STORE_MANAGER is not None and MGE_PLASMA_STORE_MANAGER.refcount == 0: | |||||
| del MGE_PLASMA_STORE_MANAGER | del MGE_PLASMA_STORE_MANAGER | ||||
| MGE_PLASMA_STORE_MANAGER = None | MGE_PLASMA_STORE_MANAGER = None | ||||
| @@ -50,6 +50,7 @@ class _PlasmaStoreManager: | |||||
| stderr=None if debug_flag else subprocess.DEVNULL, | stderr=None if debug_flag else subprocess.DEVNULL, | ||||
| ) | ) | ||||
| self.__initialized = True | self.__initialized = True | ||||
| self.refcount = 1 | |||||
| def __del__(self): | def __del__(self): | ||||
| if self.__initialized and self.plasma_store.returncode is None: | if self.__initialized and self.plasma_store.returncode is None: | ||||
| @@ -83,6 +84,8 @@ class PlasmaShmQueue: | |||||
| "Exception happened in starting plasma_store: {}\n" | "Exception happened in starting plasma_store: {}\n" | ||||
| "Tips: {}".format(str(e), err_info) | "Tips: {}".format(str(e), err_info) | ||||
| ) | ) | ||||
| else: | |||||
| MGE_PLASMA_STORE_MANAGER.refcount += 1 | |||||
| self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name | self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name | ||||
| @@ -133,6 +136,8 @@ class PlasmaShmQueue: | |||||
| def close(self): | def close(self): | ||||
| self.queue.close() | self.queue.close() | ||||
| self.disconnect_client() | self.disconnect_client() | ||||
| global MGE_PLASMA_STORE_MANAGER | |||||
| MGE_PLASMA_STORE_MANAGER.refcount -= 1 | |||||
| _clear_plasma_store() | _clear_plasma_store() | ||||
| def cancel_join_thread(self): | def cancel_join_thread(self): | ||||
| @@ -34,14 +34,14 @@ default_collate_err_msg_format = ( | |||||
| class Collator: | class Collator: | ||||
| r""" | r""" | ||||
| Used for merge a list of samples to form a mini-batch of Tenor(s). Used when using batched loading from a dataset. | |||||
| modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py | |||||
| Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset. | |||||
| Modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py | |||||
| """ | """ | ||||
| def apply(self, inputs): | def apply(self, inputs): | ||||
| """ | """ | ||||
| input : sequence_N(tuple(CHW, C, CK)) | |||||
| output : tuple(NCHW, NC, NCK) | |||||
| :param input: sequence_N(tuple(CHW, C, CK)). | |||||
| :return: tuple(NCHW, NC, NCK). | |||||
| """ | """ | ||||
| elem = inputs[0] | elem = inputs[0] | ||||
| elem_type = type(elem) | elem_type = type(elem) | ||||
| @@ -43,7 +43,7 @@ class DataLoader: | |||||
| ): | ): | ||||
| r"""Provides a convenient way to iterate on a given dataset. | r"""Provides a convenient way to iterate on a given dataset. | ||||
| `DataLoader` combines a dataset with sampler, transform and collator, | |||||
| `DataLoader` combines a dataset with `sampler`, `transform` and `collator`, | |||||
| make it flexible to get minibatch continually from a dataset. | make it flexible to get minibatch continually from a dataset. | ||||
| :type dataset: Dataset | :type dataset: Dataset | ||||
| @@ -53,21 +53,21 @@ class DataLoader: | |||||
| If specified, :attr:`shuffle` must be ``False``. | If specified, :attr:`shuffle` must be ``False``. | ||||
| :type transform: Transform | :type transform: Transform | ||||
| :param transform: defined the transforming strategy for a sampled batch. | :param transform: defined the transforming strategy for a sampled batch. | ||||
| (default: ``None``) | |||||
| Default: None | |||||
| :type collator: Collator | :type collator: Collator | ||||
| :param collator: defined the merging strategy for a transformed batch. | :param collator: defined the merging strategy for a transformed batch. | ||||
| (default: ``None``) | |||||
| Default: None | |||||
| :type num_workers: int | :type num_workers: int | ||||
| :param num_workers: the number of sub-process to load, transform and collate | :param num_workers: the number of sub-process to load, transform and collate | ||||
| the batch. ``0`` means using single-process. (default: ``0``) | |||||
| the batch. ``0`` means using single-process. Default: 0 | |||||
| :type timeout: int | :type timeout: int | ||||
| :param timeout: if positive, means the timeout value(second) for collecting a | :param timeout: if positive, means the timeout value(second) for collecting a | ||||
| batch from workers. (default: 0) | |||||
| batch from workers. Default: 0 | |||||
| :type divide: bool | :type divide: bool | ||||
| :param divide: define the paralleling strategy in multi-processing mode. | :param divide: define the paralleling strategy in multi-processing mode. | ||||
| ``True`` means one batch is divided into :attr:`num_workers` pieces, and | ``True`` means one batch is divided into :attr:`num_workers` pieces, and | ||||
| the workers will process these pieces parallelly. ``False`` means | the workers will process these pieces parallelly. ``False`` means | ||||
| different sub-process will process different batch. (default: ``False``) | |||||
| different sub-process will process different batch. Default: False | |||||
| """ | """ | ||||
| @@ -12,7 +12,7 @@ from typing import Tuple | |||||
| class Dataset(ABC): | class Dataset(ABC): | ||||
| r""" | r""" | ||||
| An abstract class for all Datasets | |||||
| An abstract class for all Datasets. | |||||
| """ | """ | ||||
| @abstractmethod | @abstractmethod | ||||
| @@ -22,8 +22,8 @@ class Dataset(ABC): | |||||
| class MapDataset(Dataset): | class MapDataset(Dataset): | ||||
| r""" | r""" | ||||
| An abstract class for map data | |||||
| __getitem__ and __len__ method are aditionally needed | |||||
| An abstract class for map data. | |||||
| __getitem__ and __len__ method are aditionally needed. | |||||
| """ | """ | ||||
| @abstractmethod | @abstractmethod | ||||
| @@ -41,8 +41,8 @@ class MapDataset(Dataset): | |||||
| class StreamDataset(Dataset): | class StreamDataset(Dataset): | ||||
| r""" | r""" | ||||
| An abstract class for stream data | |||||
| __iter__ method is aditionally needed | |||||
| An abstract class for stream data. | |||||
| __iter__ method is aditionally needed. | |||||
| """ | """ | ||||
| @abstractmethod | @abstractmethod | ||||
| @@ -21,7 +21,7 @@ logger = get_logger(__name__) | |||||
| class CIFAR10(VisionDataset): | class CIFAR10(VisionDataset): | ||||
| r""" ``Dataset`` for CIFAR10 meta data | |||||
| r""" ``Dataset`` for CIFAR10 meta data. | |||||
| """ | """ | ||||
| url_path = "http://www.cs.utoronto.ca/~kriz/" | url_path = "http://www.cs.utoronto.ca/~kriz/" | ||||
| @@ -118,7 +118,7 @@ class COCO(VisionDataset): | |||||
| self.ids = ids | self.ids = ids | ||||
| self.json_category_id_to_contiguous_id = { | self.json_category_id_to_contiguous_id = { | ||||
| v: i + 1 for i, v in enumerate(self.cats.keys()) | |||||
| v: i + 1 for i, v in enumerate(sorted(self.cats.keys())) | |||||
| } | } | ||||
| self.contiguous_category_id_to_json_id = { | self.contiguous_category_id_to_json_id = { | ||||
| @@ -30,19 +30,18 @@ class ImageFolder(VisionDataset): | |||||
| r""" | r""" | ||||
| ImageFolder is a class for loading image data and labels from a organized folder. | ImageFolder is a class for loading image data and labels from a organized folder. | ||||
| the folder is expected to be organized as followed | |||||
| root/cls/xxx.img_ext | |||||
| The folder is expected to be organized as followed: root/cls/xxx.img_ext | |||||
| labels are indices of sorted classes in the root directory | |||||
| Labels are indices of sorted classes in the root directory. | |||||
| :param root: root directory of an image folder | |||||
| :param root: root directory of an image folder. | |||||
| :param loader: a function used to load image from path, | :param loader: a function used to load image from path, | ||||
| if ``None``, default function that loads | if ``None``, default function that loads | ||||
| images with PILwill be called | |||||
| images with PIL will be called. | |||||
| :param check_valid_func: a function used to check if files in folder are | :param check_valid_func: a function used to check if files in folder are | ||||
| expected image files, if ``None``, default function | expected image files, if ``None``, default function | ||||
| that checks file extensions will be called | |||||
| :param class_name: if ``True``, return class name instead of class index | |||||
| that checks file extensions will be called. | |||||
| :param class_name: if ``True``, return class name instead of class index. | |||||
| """ | """ | ||||
| super().__init__(root, order=("image", "image_category")) | super().__init__(root, order=("image", "image_category")) | ||||
| @@ -31,7 +31,7 @@ logger = get_logger(__name__) | |||||
| class ImageNet(ImageFolder): | class ImageNet(ImageFolder): | ||||
| r""" | r""" | ||||
| Load ImageNet from raw files or folder, expected folder looks like | |||||
| Load ImageNet from raw files or folder. Expected folder looks like: | |||||
| .. code-block:: bash | .. code-block:: bash | ||||
| @@ -60,25 +60,25 @@ class ImageNet(ImageFolder): | |||||
| def __init__(self, root: str = None, train: bool = True, **kwargs): | def __init__(self, root: str = None, train: bool = True, **kwargs): | ||||
| r""" | r""" | ||||
| initialization: | |||||
| Initialization: | |||||
| * if ``root`` contains ``self.target_folder`` depent on ``train``: | |||||
| * if ``root`` contains ``self.target_folder`` depending on ``train``: | |||||
| * initialize ImageFolder with target_folder | |||||
| * initialize ImageFolder with target_folder. | |||||
| * else: | * else: | ||||
| * if all raw files are in ``root``: | * if all raw files are in ``root``: | ||||
| * parse ``self.target_folder`` from raw files | |||||
| * initialize ImageFolder with ``self.target_folder`` | |||||
| * parse ``self.target_folder`` from raw files. | |||||
| * initialize ImageFolder with ``self.target_folder``. | |||||
| * else: | * else: | ||||
| * raise error | |||||
| * raise error. | |||||
| :param root: root directory of imagenet data, if root is ``None``, used default_dataset_root | |||||
| :param train: if ``True``, load the train split, otherwise load the validation split | |||||
| :param root: root directory of imagenet data, if root is ``None``, use default_dataset_root. | |||||
| :param train: if ``True``, load the train split, otherwise load the validation split. | |||||
| """ | """ | ||||
| # process the root path | # process the root path | ||||
| @@ -22,12 +22,12 @@ logger = get_logger(__name__) | |||||
| class MNIST(VisionDataset): | class MNIST(VisionDataset): | ||||
| r""" ``Dataset`` for MNIST meta data | |||||
| r""" ``Dataset`` for MNIST meta data. | |||||
| """ | """ | ||||
| url_path = "http://yann.lecun.com/exdb/mnist/" | url_path = "http://yann.lecun.com/exdb/mnist/" | ||||
| """ | """ | ||||
| url prefix for downloading raw file | |||||
| Url prefix for downloading raw file. | |||||
| """ | """ | ||||
| raw_file_name = [ | raw_file_name = [ | ||||
| "train-images-idx3-ubyte.gz", | "train-images-idx3-ubyte.gz", | ||||
| @@ -36,7 +36,7 @@ class MNIST(VisionDataset): | |||||
| "t10k-labels-idx1-ubyte.gz", | "t10k-labels-idx1-ubyte.gz", | ||||
| ] | ] | ||||
| """ | """ | ||||
| raw file names of both training set and test set (10k) | |||||
| Raw file names of both training set and test set (10k). | |||||
| """ | """ | ||||
| raw_file_md5 = [ | raw_file_md5 = [ | ||||
| "f68b3c2dcbeaaa9fbdd348bbdeb94873", | "f68b3c2dcbeaaa9fbdd348bbdeb94873", | ||||
| @@ -45,7 +45,7 @@ class MNIST(VisionDataset): | |||||
| "ec29112dd5afa0611ce80d1b7f02629c", | "ec29112dd5afa0611ce80d1b7f02629c", | ||||
| ] | ] | ||||
| """ | """ | ||||
| md5 for checking raw files | |||||
| Md5 for checking raw files. | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||
| @@ -57,10 +57,10 @@ class MNIST(VisionDataset): | |||||
| ): | ): | ||||
| r""" | r""" | ||||
| :param root: path for mnist dataset downloading or loading, if ``None``, | :param root: path for mnist dataset downloading or loading, if ``None``, | ||||
| set ``root`` to the ``_default_root`` | |||||
| :param train: if ``True``, loading trainingset, else loading test set | |||||
| set ``root`` to the ``_default_root``. | |||||
| :param train: if ``True``, loading trainingset, else loading test set. | |||||
| :param download: if raw files do not exists and download sets to ``True``, | :param download: if raw files do not exists and download sets to ``True``, | ||||
| download raw files and process, otherwise raise ValueError, default is True | |||||
| download raw files and process, otherwise raise ValueError, default is True. | |||||
| """ | """ | ||||
| super().__init__(root, order=("image", "image_category")) | super().__init__(root, order=("image", "image_category")) | ||||
| @@ -81,7 +81,7 @@ class Objects365(VisionDataset): | |||||
| self.ids = ids | self.ids = ids | ||||
| self.json_category_id_to_contiguous_id = { | self.json_category_id_to_contiguous_id = { | ||||
| v: i + 1 for i, v in enumerate(self.cats.keys()) | |||||
| v: i + 1 for i, v in enumerate(sorted(self.cats.keys())) | |||||
| } | } | ||||
| self.contiguous_category_id_to_json_id = { | self.contiguous_category_id_to_json_id = { | ||||
| @@ -75,6 +75,8 @@ class PascalVOC(VisionDataset): | |||||
| else: | else: | ||||
| raise NotImplementedError | raise NotImplementedError | ||||
| self.img_infos = dict() | |||||
| def __getitem__(self, index): | def __getitem__(self, index): | ||||
| target = [] | target = [] | ||||
| for k in self.order: | for k in self.order: | ||||
| @@ -107,9 +109,8 @@ class PascalVOC(VisionDataset): | |||||
| mask = mask[:, :, np.newaxis] | mask = mask[:, :, np.newaxis] | ||||
| target.append(mask) | target.append(mask) | ||||
| elif k == "info": | elif k == "info": | ||||
| if image is None: | |||||
| image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) | |||||
| info = [image.shape[0], image.shape[1], self.file_names[index]] | |||||
| info = self.get_img_info(index, image) | |||||
| info = [info["height"], info["width"], info["file_name"]] | |||||
| target.append(info) | target.append(info) | ||||
| else: | else: | ||||
| raise NotImplementedError | raise NotImplementedError | ||||
| @@ -119,6 +120,17 @@ class PascalVOC(VisionDataset): | |||||
| def __len__(self): | def __len__(self): | ||||
| return len(self.images) | return len(self.images) | ||||
| def get_img_info(self, index, image=None): | |||||
| if index not in self.img_infos: | |||||
| if image is None: | |||||
| image = cv2.imread(self.images[index], cv2.IMREAD_COLOR) | |||||
| self.img_infos[index] = dict( | |||||
| height=image.shape[0], | |||||
| width=image.shape[1], | |||||
| file_name=self.file_names[index], | |||||
| ) | |||||
| return self.img_infos[index] | |||||
| def _trans_mask(self, mask): | def _trans_mask(self, mask): | ||||
| label = np.ones(mask.shape[:2]) * 255 | label = np.ones(mask.shape[:2]) * 255 | ||||
| for i in range(len(self.class_colors)): | for i in range(len(self.class_colors)): | ||||
| @@ -171,25 +183,3 @@ class PascalVOC(VisionDataset): | |||||
| "train", | "train", | ||||
| "tvmonitor", | "tvmonitor", | ||||
| ) | ) | ||||
| class_colors = [ | |||||
| [0, 0, 128], | |||||
| [0, 128, 0], | |||||
| [0, 128, 128], | |||||
| [128, 0, 0], | |||||
| [128, 0, 128], | |||||
| [128, 128, 0], | |||||
| [128, 128, 128], | |||||
| [0, 0, 64], | |||||
| [0, 0, 192], | |||||
| [0, 128, 64], | |||||
| [0, 128, 192], | |||||
| [128, 0, 64], | |||||
| [128, 0, 192], | |||||
| [128, 128, 64], | |||||
| [128, 128, 192], | |||||
| [0, 64, 0], | |||||
| [0, 64, 128], | |||||
| [0, 192, 0], | |||||
| [0, 192, 128], | |||||
| [128, 64, 0], | |||||
| ] | |||||
| @@ -28,25 +28,25 @@ class Sampler(ABC): | |||||
| seed=None, | seed=None, | ||||
| ): | ): | ||||
| r""" | r""" | ||||
| An abstract class for all sampler | |||||
| An abstract class for all sampler. | |||||
| :type dataset: `dataset` | :type dataset: `dataset` | ||||
| :param dataset: dataset to sample from | |||||
| :param dataset: dataset to sample from. | |||||
| :type batch_size: positive integer | :type batch_size: positive integer | ||||
| :param batch_size: batch size for batch method | |||||
| :param batch_size: batch size for batch method. | |||||
| :type drop_last: bool | :type drop_last: bool | ||||
| :param drop_last: set ``True`` to drop the last incomplete batch, | :param drop_last: set ``True`` to drop the last incomplete batch, | ||||
| if the dataset size is not divisible by the batch size. If ``False`` and | if the dataset size is not divisible by the batch size. If ``False`` and | ||||
| the size of dataset is not divisible by the batch_size, then the last batch will | the size of dataset is not divisible by the batch_size, then the last batch will | ||||
| be smaller. (default: ``False``) | |||||
| be smaller. Default: False | |||||
| :type num_samples: positive integer | :type num_samples: positive integer | ||||
| :param num_samples: number of samples assigned to one rank | |||||
| :param num_samples: number of samples assigned to one rank. | |||||
| :type world_size: positive integer | :type world_size: positive integer | ||||
| :param world_size: number of ranks | |||||
| :param world_size: number of ranks. | |||||
| :type rank: non-negative integer within 0 and world_size | :type rank: non-negative integer within 0 and world_size | ||||
| :param rank: rank id, non-negative interger within 0 and ``world_size`` | |||||
| :param rank: rank id, non-negative interger within 0 and ``world_size``. | |||||
| :type seed: non-negative integer | :type seed: non-negative integer | ||||
| :param seed: seed for random operators | |||||
| :param seed: seed for random operators. | |||||
| """ | """ | ||||
| if ( | if ( | ||||
| not isinstance(batch_size, int) | not isinstance(batch_size, int) | ||||
| @@ -103,15 +103,15 @@ class Sampler(ABC): | |||||
| def sample(self): | def sample(self): | ||||
| """ | """ | ||||
| return a list contains all sample indices | |||||
| Return a list contains all sample indices. | |||||
| """ | """ | ||||
| raise NotImplementedError | raise NotImplementedError | ||||
| def scatter(self, indices) -> List: | def scatter(self, indices) -> List: | ||||
| r""" | r""" | ||||
| scatter method is used for splitting indices into subset, each subset | |||||
| Scatter method is used for splitting indices into subset, each subset | |||||
| will be assigned to a rank. Indices are evenly splitted by default. | will be assigned to a rank. Indices are evenly splitted by default. | ||||
| If customized indices assignment method is needed, please rewrite this method | |||||
| If customized indices assignment method is needed, please rewrite this method. | |||||
| """ | """ | ||||
| total_size = self.num_samples * self.world_size | total_size = self.num_samples * self.world_size | ||||
| @@ -127,7 +127,7 @@ class Sampler(ABC): | |||||
| def batch(self) -> Iterator[List[Any]]: | def batch(self) -> Iterator[List[Any]]: | ||||
| r""" | r""" | ||||
| batch method provides a batch indices generator | |||||
| Batch method provides a batch indices generator. | |||||
| """ | """ | ||||
| indices = list(self.sample()) | indices = list(self.sample()) | ||||
| @@ -156,7 +156,7 @@ class SequentialSampler(Sampler): | |||||
| rank=None, | rank=None, | ||||
| ): | ): | ||||
| r""" | r""" | ||||
| Sample elements sequentially | |||||
| Sample elements sequentially. | |||||
| """ | """ | ||||
| super().__init__(dataset, batch_size, drop_last, None, world_size, rank) | super().__init__(dataset, batch_size, drop_last, None, world_size, rank) | ||||
| if indices is not None and not isinstance(indices, collections.abc.Sequence): | if indices is not None and not isinstance(indices, collections.abc.Sequence): | ||||
| @@ -168,7 +168,7 @@ class SequentialSampler(Sampler): | |||||
| def sample(self) -> Iterator[Any]: | def sample(self) -> Iterator[Any]: | ||||
| r""" | r""" | ||||
| return a generator | |||||
| Return a generator. | |||||
| """ | """ | ||||
| if self.indices is None: | if self.indices is None: | ||||
| return iter(range(len(self.dataset))) | return iter(range(len(self.dataset))) | ||||
| @@ -188,7 +188,7 @@ class RandomSampler(Sampler): | |||||
| seed=None, | seed=None, | ||||
| ): | ): | ||||
| r""" | r""" | ||||
| Sample elements randomly without replacement | |||||
| Sample elements randomly without replacement. | |||||
| """ | """ | ||||
| super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed) | super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed) | ||||
| if indices is not None and not isinstance(indices, collections.abc.Sequence): | if indices is not None and not isinstance(indices, collections.abc.Sequence): | ||||
| @@ -218,10 +218,10 @@ class ReplacementSampler(Sampler): | |||||
| seed=None, | seed=None, | ||||
| ): | ): | ||||
| r""" | r""" | ||||
| Sample elements randomly with replacement | |||||
| Sample elements randomly with replacement. | |||||
| :type weights: List | :type weights: List | ||||
| :param weights: weights for sampling indices, it could be unnormalized weights | |||||
| :param weights: weights for sampling indices, it could be unnormalized weights. | |||||
| """ | """ | ||||
| super().__init__( | super().__init__( | ||||
| dataset, batch_size, drop_last, num_samples, world_size, rank, seed | dataset, batch_size, drop_last, num_samples, world_size, rank, seed | ||||
| @@ -250,7 +250,7 @@ class ReplacementSampler(Sampler): | |||||
| class Infinite(Sampler): | class Infinite(Sampler): | ||||
| r"""Infinite Sampler warper for basic sampler""" | |||||
| r"""Infinite Sampler warper for basic sampler.""" | |||||
| def sample(self): | def sample(self): | ||||
| raise NotImplementedError("sample method not supported in Infinite") | raise NotImplementedError("sample method not supported in Infinite") | ||||
| @@ -12,7 +12,7 @@ from typing import Sequence, Tuple | |||||
| class Transform(ABC): | class Transform(ABC): | ||||
| """ | """ | ||||
| rewrite apply method in subclass | |||||
| Rewrite apply method in subclass. | |||||
| """ | """ | ||||
| def apply_batch(self, inputs: Sequence[Tuple]): | def apply_batch(self, inputs: Sequence[Tuple]): | ||||
| @@ -15,7 +15,7 @@ import numpy as np | |||||
| def wrap_keepdims(func): | def wrap_keepdims(func): | ||||
| """Wraper to keep the dimension of input images unchanged""" | |||||
| """Wraper to keep the dimension of input images unchanged.""" | |||||
| @functools.wraps(func) | @functools.wraps(func) | ||||
| def wrapper(image, *args, **kwargs): | def wrapper(image, *args, **kwargs): | ||||
| @@ -34,10 +34,10 @@ def wrap_keepdims(func): | |||||
| @wrap_keepdims | @wrap_keepdims | ||||
| def to_gray(image): | def to_gray(image): | ||||
| r""" | r""" | ||||
| Change BGR format image's color space to gray | |||||
| Change BGR format image's color space to gray. | |||||
| :param image: Input BGR format image, with (H, W, C) shape | |||||
| :return: Gray format image, with (H, W, C) shape | |||||
| :param image: input BGR format image, with `(H, W, C)` shape. | |||||
| :return: gray format image, with `(H, W, C)` shape. | |||||
| """ | """ | ||||
| return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | ||||
| @@ -45,10 +45,10 @@ def to_gray(image): | |||||
| @wrap_keepdims | @wrap_keepdims | ||||
| def to_bgr(image): | def to_bgr(image): | ||||
| r""" | r""" | ||||
| Change gray format image's color space to BGR | |||||
| Change gray format image's color space to BGR. | |||||
| :param image: input Gray format image, with (H, W, C) shape | |||||
| :return: BGR format image, with (H, W, C) shape | |||||
| :param image: input Gray format image, with `(H, W, C)` shape. | |||||
| :return: BGR format image, with `(H, W, C)` shape. | |||||
| """ | """ | ||||
| return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | ||||
| @@ -56,18 +56,18 @@ def to_bgr(image): | |||||
| @wrap_keepdims | @wrap_keepdims | ||||
| def pad(input, size, value): | def pad(input, size, value): | ||||
| r""" | r""" | ||||
| Pad input data with *value* and given *size* | |||||
| Pad input data with *value* and given *size*. | |||||
| :param input: Input data, with (H, W, C) shape | |||||
| :param size: Padding size of input data, it could be integer or sequence. | |||||
| If it's an integer, the input data will be padded in four directions. | |||||
| If it's a sequence contains two integer, the bottom and right side | |||||
| :param input: input data, with `(H, W, C)` shape. | |||||
| :param size: padding size of input data, it could be integer or sequence. | |||||
| If it is an integer, the input data will be padded in four directions. | |||||
| If it is a sequence contains two integer, the bottom and right side | |||||
| of input data will be padded. | of input data will be padded. | ||||
| If it's a sequence contains four integer, the top, bottom, left, right | |||||
| If it is a sequence contains four integer, the top, bottom, left, right | |||||
| side of input data will be padded with given size. | side of input data will be padded with given size. | ||||
| :param value: Padding value of data, could be a sequence of int or float. | |||||
| if it's float value, the dtype of image will be casted to float32 also. | |||||
| :return: Padded image | |||||
| :param value: padding value of data, could be a sequence of int or float. | |||||
| If it is float value, the dtype of image will be casted to float32 also. | |||||
| :return: padded image. | |||||
| """ | """ | ||||
| if isinstance(size, int): | if isinstance(size, int): | ||||
| size = (size, size, size, size) | size = (size, size, size, size) | ||||
| @@ -81,14 +81,18 @@ def pad(input, size, value): | |||||
| @wrap_keepdims | @wrap_keepdims | ||||
| def flip(image, flipCode): | def flip(image, flipCode): | ||||
| r""" | r""" | ||||
| Accordding to the flipCode (the type of flip), flip the input image | |||||
| Accordding to the flipCode (the type of flip), flip the input image. | |||||
| :param image: Input image, with (H, W, C) shape | |||||
| :param image: input image, with `(H, W, C)` shape. | |||||
| :param flipCode: code that indicates the type of flip. | :param flipCode: code that indicates the type of flip. | ||||
| 1 : Flip horizontally | |||||
| 0 : Flip vertically | |||||
| -1 : Flip horizontally and vertically | |||||
| :return: BGR format image, with (H, W, C) shape | |||||
| * 1 : Flip horizontally | |||||
| * 0 : Flip vertically | |||||
| * -1: Flip horizontally and vertically | |||||
| :return: BGR format image, with `(H, W, C)` shape. | |||||
| """ | """ | ||||
| return cv2.flip(image, flipCode=flipCode) | return cv2.flip(image, flipCode=flipCode) | ||||
| @@ -96,12 +100,12 @@ def flip(image, flipCode): | |||||
| @wrap_keepdims | @wrap_keepdims | ||||
| def resize(input, size, interpolation=cv2.INTER_LINEAR): | def resize(input, size, interpolation=cv2.INTER_LINEAR): | ||||
| r""" | r""" | ||||
| resize the input data to given size | |||||
| Resize the input data to given size. | |||||
| :param input: Input data, could be image or masks, with (H, W, C) shape | |||||
| :param size: Target size of input data, with (height, width) shape. | |||||
| :param interpolation: Interpolation method. | |||||
| :return: Resized data, with (H, W, C) shape | |||||
| :param input: input data, could be image or masks, with `(H, W, C)` shape. | |||||
| :param size: target size of input data, with (height, width) shape. | |||||
| :param interpolation: interpolation method. | |||||
| :return: resized data, with `(H, W, C)` shape. | |||||
| """ | """ | ||||
| if len(size) != 2: | if len(size) != 2: | ||||
| raise ValueError("resize needs (h, w), but got {}".format(size)) | raise ValueError("resize needs (h, w), but got {}".format(size)) | ||||
| @@ -44,26 +44,26 @@ __all__ = [ | |||||
| class VisionTransform(Transform): | class VisionTransform(Transform): | ||||
| r""" | r""" | ||||
| Base class of all transforms used in computer vision. | Base class of all transforms used in computer vision. | ||||
| calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() | |||||
| Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() | |||||
| method. If you want to implement a self-defined transform method for image, | method. If you want to implement a self-defined transform method for image, | ||||
| rewrite _apply_image method in subclass. | rewrite _apply_image method in subclass. | ||||
| :param order: Input type order. Input is a tuple contains different structures, | |||||
| :param order: input type order. Input is a tuple containing different structures, | |||||
| order is used to specify the order of structures. For example, if your input | order is used to specify the order of structures. For example, if your input | ||||
| is (image, boxes) type, then the order should be ("image", "boxes"). | |||||
| Current available strings & data type are describe below: | |||||
| is (image, boxes) type, then the ``order`` should be ("image", "boxes"). | |||||
| Current available strings and data type are describe below: | |||||
| * "image": input image, with shape of (H, W, C) | |||||
| * "coords": coordinates, with shape of (N, 2) | |||||
| * "boxes": bounding boxes, with shape of (N, 4), "xyxy" format, | |||||
| * "image": input image, with shape of `(H, W, C)`. | |||||
| * "coords": coordinates, with shape of `(N, 2)`. | |||||
| * "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, | |||||
| the 1st "xy" represents top left point of a box, | the 1st "xy" represents top left point of a box, | ||||
| the 2nd "xy" represents right bottom point. | the 2nd "xy" represents right bottom point. | ||||
| * "mask": map used for segmentation, with shape of (H, W, 1) | |||||
| * "keypoints": keypoints with shape of (N, K, 3), N for number of instances, | |||||
| * "mask": map used for segmentation, with shape of `(H, W, 1)`. | |||||
| * "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, | |||||
| and K for number of keypoints in one instance. The first two dimensions | and K for number of keypoints in one instance. The first two dimensions | ||||
| of last axis is coordinate of keypoints and the the 3rd dimension is | of last axis is coordinate of keypoints and the the 3rd dimension is | ||||
| the label of keypoints. | the label of keypoints. | ||||
| * "polygons": A sequence contains numpy array, its length is number of instances. | |||||
| * "polygons": a sequence containing numpy arrays, its length is the number of instances. | |||||
| Each numpy array represents polygon coordinate of one instance. | Each numpy array represents polygon coordinate of one instance. | ||||
| * "category": categories for some data type. For example, "image_category" | * "category": categories for some data type. For example, "image_category" | ||||
| means category of the input image and "boxes_category" means categories of | means category of the input image and "boxes_category" means categories of | ||||
| @@ -94,11 +94,11 @@ class VisionTransform(Transform): | |||||
| self.order = order | self.order = order | ||||
| def apply_batch(self, inputs: Sequence[Tuple]): | def apply_batch(self, inputs: Sequence[Tuple]): | ||||
| r"""Apply transform on batch input data""" | |||||
| r"""Apply transform on batch input data.""" | |||||
| return tuple(self.apply(input) for input in inputs) | return tuple(self.apply(input) for input in inputs) | ||||
| def apply(self, input: Tuple): | def apply(self, input: Tuple): | ||||
| r"""Apply transform on single input data""" | |||||
| r"""Apply transform on single input data.""" | |||||
| if not isinstance(input, tuple): | if not isinstance(input, tuple): | ||||
| input = (input,) | input = (input,) | ||||
| @@ -156,10 +156,10 @@ class VisionTransform(Transform): | |||||
| class ToMode(VisionTransform): | class ToMode(VisionTransform): | ||||
| r"""Change input data to a target mode. | r"""Change input data to a target mode. | ||||
| For example, most transforms use HWC mode image, | For example, most transforms use HWC mode image, | ||||
| while the Neural Network might use CHW mode input tensor | |||||
| while the neural network might use CHW mode input tensor. | |||||
| :param mode: Output mode of input. Use "CHW" mode by default. | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param mode: output mode of input. Default: "CHW" | |||||
| :param order: the same with :class:`VisionTransform` | |||||
| """ | """ | ||||
| def __init__(self, mode="CHW", *, order=None): | def __init__(self, mode="CHW", *, order=None): | ||||
| @@ -185,14 +185,14 @@ class Compose(VisionTransform): | |||||
| r""" | r""" | ||||
| Composes several transforms together. | Composes several transforms together. | ||||
| :param transforms: List of :class:`VisionTransform` to compose. | |||||
| :param batch_compose: Whether use shuffle_indices for batch data or not. | |||||
| :param transforms: list of :class:`VisionTransform` to compose. | |||||
| :param batch_compose: whether use shuffle_indices for batch data or not. | |||||
| If True, use original input sequence. | If True, use original input sequence. | ||||
| Otherwise, the shuffle_indices will be used for transforms. | Otherwise, the shuffle_indices will be used for transforms. | ||||
| :param shuffle_indices: Indices used for random shuffle, start at 1. | |||||
| :param shuffle_indices: indices used for random shuffle, start at 1. | |||||
| For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform | ||||
| will be random shuffled, the 2nd and 4th transform will also be shuffled. | will be random shuffled, the 2nd and 4th transform will also be shuffled. | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform` | |||||
| Examples: | Examples: | ||||
| @@ -264,8 +264,8 @@ class TorchTransformCompose(VisionTransform): | |||||
| some transforms with tensor in torchvision are not supported, | some transforms with tensor in torchvision are not supported, | ||||
| such as Normalize and ToTensor in torchvision. | such as Normalize and ToTensor in torchvision. | ||||
| :param transforms: The same with ``Compose`` | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param transforms: the same with ``Compose``. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, transforms, *, order=None): | def __init__(self, transforms, *, order=None): | ||||
| @@ -303,16 +303,16 @@ class TorchTransformCompose(VisionTransform): | |||||
| class Pad(VisionTransform): | class Pad(VisionTransform): | ||||
| r"""Pad the input data. | r"""Pad the input data. | ||||
| :param size: Padding size of input image, it could be integer or sequence. | |||||
| If it's an integer, the input image will be padded in four directions. | |||||
| If it's a sequence contains two integer, the bottom and right side | |||||
| :param size: padding size of input image, it could be integer or sequence. | |||||
| If it is an integer, the input image will be padded in four directions. | |||||
| If it is a sequence containing two integers, the bottom and right side | |||||
| of image will be padded. | of image will be padded. | ||||
| If it's a sequence contains four integer, the top, bottom, left, right | |||||
| If it is a sequence containing four integers, the top, bottom, left, right | |||||
| side of image will be padded with given size. | side of image will be padded with given size. | ||||
| :param value: Padding value of image, could be a sequence of int or float. | |||||
| if it's float value, the dtype of image will be casted to float32 also. | |||||
| :param mask_value: Padding value of segmentation map. | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param value: padding value of image, could be a sequence of int or float. | |||||
| if it is float value, the dtype of image will be casted to float32 also. | |||||
| :param mask_value: padding value of segmentation map. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, size=0, value=0, mask_value=0, *, order=None): | def __init__(self, size=0, value=0, mask_value=0, *, order=None): | ||||
| @@ -350,15 +350,15 @@ class Pad(VisionTransform): | |||||
| class Resize(VisionTransform): | class Resize(VisionTransform): | ||||
| r"""Resize the input data. | r"""Resize the input data. | ||||
| :param output_size: Target size of image, with (height, width) shape. | |||||
| :param interpolation: Interpolation method. All methods are listed below: | |||||
| :param output_size: target size of image, with (height, width) shape. | |||||
| :param interpolation: interpolation method. All methods are listed below: | |||||
| * cv2.INTER_NEAREST – a nearest-neighbor interpolation. | * cv2.INTER_NEAREST – a nearest-neighbor interpolation. | ||||
| * cv2.INTER_LINEAR – a bilinear interpolation (used by default). | * cv2.INTER_LINEAR – a bilinear interpolation (used by default). | ||||
| * cv2.INTER_AREA – resampling using pixel area relation. | * cv2.INTER_AREA – resampling using pixel area relation. | ||||
| * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. | ||||
| * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): | def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): | ||||
| @@ -476,8 +476,8 @@ class ShortestEdgeResize(VisionTransform): | |||||
| class RandomResize(VisionTransform): | class RandomResize(VisionTransform): | ||||
| r"""Resize the input data randomly. | r"""Resize the input data randomly. | ||||
| :param scale_range: . | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param scale_range: range of scaling. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): | def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): | ||||
| @@ -519,13 +519,13 @@ class RandomResize(VisionTransform): | |||||
| class RandomCrop(VisionTransform): | class RandomCrop(VisionTransform): | ||||
| r"""Crop the input data randomly. Before applying the crop transform, | r"""Crop the input data randomly. Before applying the crop transform, | ||||
| pad the image first. And if target size is still bigger than the size of | |||||
| pad the image first. If target size is still bigger than the size of | |||||
| padded image, pad the image size to target size. | padded image, pad the image size to target size. | ||||
| :param output_size: Target size of output image, with (height, width) shape. | |||||
| :param padding_size: The same with `size` in ``Pad`` | |||||
| :param padding_value: The same with `value` in ``Pad`` | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param output_size: target size of output image, with (height, width) shape. | |||||
| :param padding_size: the same with `size` in ``Pad``. | |||||
| :param padding_value: the same with `value` in ``Pad``. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||
| @@ -580,10 +580,10 @@ class RandomResizedCrop(VisionTransform): | |||||
| aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. | ||||
| After applying crop transfrom, the input data will be resized to given size. | After applying crop transfrom, the input data will be resized to given size. | ||||
| :param output_size: Target size of output image, with (height, width) shape. | |||||
| :param scale_range: Range of size of the origin size cropped. Default: (0.08, 1.0) | |||||
| :param ratio_range: Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param output_size: target size of output image, with (height, width) shape. | |||||
| :param scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) | |||||
| :param ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||
| @@ -666,8 +666,8 @@ class RandomResizedCrop(VisionTransform): | |||||
| class CenterCrop(VisionTransform): | class CenterCrop(VisionTransform): | ||||
| r"""Crops the given the input data at the center. | r"""Crops the given the input data at the center. | ||||
| :param output_size: Target size of output image, with (height, width) shape. | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param output_size: target size of output image, with (height, width) shape. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, output_size, *, order=None): | def __init__(self, output_size, *, order=None): | ||||
| @@ -710,7 +710,7 @@ class RandomHorizontalFlip(VisionTransform): | |||||
| r"""Horizontally flip the input data randomly with a given probability. | r"""Horizontally flip the input data randomly with a given probability. | ||||
| :param p: probability of the input data being flipped. Default: 0.5 | :param p: probability of the input data being flipped. Default: 0.5 | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, prob: float = 0.5, *, order=None): | def __init__(self, prob: float = 0.5, *, order=None): | ||||
| @@ -742,7 +742,7 @@ class RandomVerticalFlip(VisionTransform): | |||||
| r"""Vertically flip the input data randomly with a given probability. | r"""Vertically flip the input data randomly with a given probability. | ||||
| :param p: probability of the input data being flipped. Default: 0.5 | :param p: probability of the input data being flipped. Default: 0.5 | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, prob: float = 0.5, *, order=None): | def __init__(self, prob: float = 0.5, *, order=None): | ||||
| @@ -776,9 +776,9 @@ class Normalize(VisionTransform): | |||||
| this transform will normalize each channel of the input data. | this transform will normalize each channel of the input data. | ||||
| ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` | ||||
| :param mean: Sequence of means for each channel. | |||||
| :param std: Sequence of standard deviations for each channel. | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param mean: sequence of means for each channel. | |||||
| :param std: sequence of standard deviations for each channel. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, mean=0.0, std=1.0, *, order=None): | def __init__(self, mean=0.0, std=1.0, *, order=None): | ||||
| @@ -802,7 +802,7 @@ class GaussianNoise(VisionTransform): | |||||
| :param mean: Gaussian mean used to generate noise. | :param mean: Gaussian mean used to generate noise. | ||||
| :param std: Gaussian standard deviation used to generate noise. | :param std: Gaussian standard deviation used to generate noise. | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform` | |||||
| """ | """ | ||||
| def __init__(self, mean=0.0, std=1.0, *, order=None): | def __init__(self, mean=0.0, std=1.0, *, order=None): | ||||
| @@ -826,9 +826,9 @@ class GaussianNoise(VisionTransform): | |||||
| class BrightnessTransform(VisionTransform): | class BrightnessTransform(VisionTransform): | ||||
| r"""Adjust brightness of the input data. | r"""Adjust brightness of the input data. | ||||
| :param value: How much to adjust the brightness. Can be any | |||||
| non negative number. 0 gives the original image | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param value: how much to adjust the brightness. Can be any | |||||
| non negative number. 0 gives the original image. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, value, *, order=None): | def __init__(self, value, *, order=None): | ||||
| @@ -857,9 +857,9 @@ class BrightnessTransform(VisionTransform): | |||||
| class ContrastTransform(VisionTransform): | class ContrastTransform(VisionTransform): | ||||
| r"""Adjust contrast of the input data. | r"""Adjust contrast of the input data. | ||||
| :param value: How much to adjust the contrast. Can be any | |||||
| non negative number. 0 gives the original image | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param value: how much to adjust the contrast. Can be any | |||||
| non negative number. 0 gives the original image. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, value, *, order=None): | def __init__(self, value, *, order=None): | ||||
| @@ -888,9 +888,9 @@ class ContrastTransform(VisionTransform): | |||||
| class SaturationTransform(VisionTransform): | class SaturationTransform(VisionTransform): | ||||
| r"""Adjust saturation of the input data. | r"""Adjust saturation of the input data. | ||||
| :param value: How much to adjust the saturation. Can be any | |||||
| non negative number. 0 gives the original image | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param value: how much to adjust the saturation. Can be any | |||||
| non negative number. 0 gives the original image. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, value, *, order=None): | def __init__(self, value, *, order=None): | ||||
| @@ -919,9 +919,9 @@ class SaturationTransform(VisionTransform): | |||||
| class HueTransform(VisionTransform): | class HueTransform(VisionTransform): | ||||
| r"""Adjust hue of the input data. | r"""Adjust hue of the input data. | ||||
| :param value: How much to adjust the hue. Can be any number | |||||
| between 0 and 0.5, 0 gives the original image | |||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param value: how much to adjust the hue. Can be any number | |||||
| between 0 and 0.5, 0 gives the original image. | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, value, *, order=None): | def __init__(self, value, *, order=None): | ||||
| @@ -957,19 +957,19 @@ class HueTransform(VisionTransform): | |||||
| class ColorJitter(VisionTransform): | class ColorJitter(VisionTransform): | ||||
| r"""Randomly change the brightness, contrast, saturation and hue of an image. | r"""Randomly change the brightness, contrast, saturation and hue of an image. | ||||
| :param brightness: How much to jitter brightness. | |||||
| :param brightness: how much to jitter brightness. | |||||
| Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] | ||||
| or the given [min, max]. Should be non negative numbers. | or the given [min, max]. Should be non negative numbers. | ||||
| :param contrast: How much to jitter contrast. | |||||
| :param contrast: how much to jitter contrast. | |||||
| Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] | ||||
| or the given [min, max]. Should be non negative numbers. | or the given [min, max]. Should be non negative numbers. | ||||
| :param saturation: How much to jitter saturation. | |||||
| :param saturation: how much to jitter saturation. | |||||
| Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] | ||||
| or the given [min, max]. Should be non negative numbers. | or the given [min, max]. Should be non negative numbers. | ||||
| :param hue: How much to jitter hue. | |||||
| :param hue: how much to jitter hue. | |||||
| Chosen uniformly from [-hue, hue] or the given [min, max]. | Chosen uniformly from [-hue, hue] or the given [min, max]. | ||||
| Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. | ||||
| :param order: The same with :class:`VisionTransform` | |||||
| :param order: the same with :class:`VisionTransform`. | |||||
| """ | """ | ||||
| def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): | def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): | ||||
| @@ -7,6 +7,7 @@ | |||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import os | import os | ||||
| import re | |||||
| from .core._imperative_rt.common import CompNode, DeviceType | from .core._imperative_rt.common import CompNode, DeviceType | ||||
| from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config | from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config | ||||
| @@ -22,10 +23,8 @@ __all__ = [ | |||||
| def _valid_device(inp): | def _valid_device(inp): | ||||
| if isinstance(inp, str) and len(inp) == 4: | |||||
| if inp[0] in {"x", "c", "g"} and inp[1:3] == "pu": | |||||
| if inp[3] == "x" or inp[3].isdigit(): | |||||
| return True | |||||
| if isinstance(inp, str) and re.match("^[cxg]pu(\d+|\d+:\d+|x)$", inp): | |||||
| return True | |||||
| return False | return False | ||||
| @@ -71,11 +70,11 @@ def set_default_device(device: str = "xpux"): | |||||
| 'multithread' device type is avaliable when inference, which implements | 'multithread' device type is avaliable when inference, which implements | ||||
| multi-threading parallelism at the operator level. For example, | multi-threading parallelism at the operator level. For example, | ||||
| 'multithread4' will compute with 4 threads. which implements | |||||
| 'multithread4' will compute with 4 threads. | |||||
| The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available. | The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available. | ||||
| It can also be set by environmental variable `MGE_DEFAULT_DEVICE`. | |||||
| It can also be set by environment variable `MGE_DEFAULT_DEVICE`. | |||||
| """ | """ | ||||
| assert _valid_device(device), "Invalid device name {}".format(device) | assert _valid_device(device), "Invalid device name {}".format(device) | ||||
| CompNode._set_default_device(device) | CompNode._set_default_device(device) | ||||
| @@ -99,13 +98,13 @@ def set_prealloc_config( | |||||
| growth_factor=2.0, | growth_factor=2.0, | ||||
| device_type=DeviceType.CUDA, | device_type=DeviceType.CUDA, | ||||
| ): | ): | ||||
| """specifies how to pre-allocate from raw dev allocator | |||||
| """Specifies how to pre-allocate from raw device allocator. | |||||
| :param alignment: specifies the alignment in bytes. | :param alignment: specifies the alignment in bytes. | ||||
| :param min_req: min request size in bytes. | :param min_req: min request size in bytes. | ||||
| :param max_overhead: max overhead above required size in bytes. | :param max_overhead: max overhead above required size in bytes. | ||||
| :growth_factor: request size / cur allocated | |||||
| :device_type: the device type | |||||
| :param growth_factor: `request size / cur allocated` | |||||
| :param device_type: the device type | |||||
| """ | """ | ||||
| assert alignment > 0 | assert alignment > 0 | ||||
| @@ -102,7 +102,7 @@ def _(op: RemoteRecv): | |||||
| def collective_comm(inp, mode, group, device): | def collective_comm(inp, mode, group, device): | ||||
| """Helper function for applying collective communication functions""" | |||||
| """Helper function for applying collective communication functions.""" | |||||
| assert isinstance(group, Group) | assert isinstance(group, Group) | ||||
| if group is None: | if group is None: | ||||
| return inp | return inp | ||||
| @@ -123,11 +123,11 @@ def collective_comm(inp, mode, group, device): | |||||
| def reduce_sum( | def reduce_sum( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create reduce_sum operator for collective communication | |||||
| """Create reduce_sum operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.REDUCE_SUM | mode = CollectiveCommMode.REDUCE_SUM | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -136,11 +136,11 @@ def reduce_sum( | |||||
| def broadcast( | def broadcast( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create broadcast operator for collective communication | |||||
| """Create broadcast operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.BROADCAST | mode = CollectiveCommMode.BROADCAST | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -149,11 +149,11 @@ def broadcast( | |||||
| def all_gather( | def all_gather( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create all_gather operator for collective communication | |||||
| """Create all_gather operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.ALL_GATHER | mode = CollectiveCommMode.ALL_GATHER | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -162,11 +162,11 @@ def all_gather( | |||||
| def reduce_scatter_sum( | def reduce_scatter_sum( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create reduce_scatter_sum operator for collective communication | |||||
| """Create reduce_scatter_sum operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.REDUCE_SCATTER_SUM | mode = CollectiveCommMode.REDUCE_SCATTER_SUM | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -175,11 +175,11 @@ def reduce_scatter_sum( | |||||
| def all_reduce_sum( | def all_reduce_sum( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create all_reduce_sum operator for collective communication | |||||
| """Create all_reduce_sum operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.ALL_REDUCE_SUM | mode = CollectiveCommMode.ALL_REDUCE_SUM | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -188,11 +188,11 @@ def all_reduce_sum( | |||||
| def all_reduce_max( | def all_reduce_max( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create all_reduce_max operator for collective communication | |||||
| """Create all_reduce_max operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.ALL_REDUCE_MAX | mode = CollectiveCommMode.ALL_REDUCE_MAX | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -201,11 +201,11 @@ def all_reduce_max( | |||||
| def all_reduce_min( | def all_reduce_min( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create all_reduce_min operator for collective communication | |||||
| """Create all_reduce_min operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.ALL_REDUCE_MIN | mode = CollectiveCommMode.ALL_REDUCE_MIN | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -214,11 +214,11 @@ def all_reduce_min( | |||||
| def gather( | def gather( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create gather operator for collective communication | |||||
| """Create gather operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.GATHER | mode = CollectiveCommMode.GATHER | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -227,11 +227,11 @@ def gather( | |||||
| def scatter( | def scatter( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create scatter operator for collective communication | |||||
| """Create scatter operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.SCATTER | mode = CollectiveCommMode.SCATTER | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| @@ -240,21 +240,21 @@ def scatter( | |||||
| def all_to_all( | def all_to_all( | ||||
| inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Create all_to_all operator for collective communication | |||||
| """Create all_to_all operator for collective communication. | |||||
| :param inp: input tensor | |||||
| :param group: communication group | |||||
| :param device: execute placement | |||||
| :param inp: input tensor. | |||||
| :param group: communication group. | |||||
| :param device: execution device. | |||||
| """ | """ | ||||
| mode = CollectiveCommMode.ALL_TO_ALL | mode = CollectiveCommMode.ALL_TO_ALL | ||||
| return collective_comm(inp, mode, group, device) | return collective_comm(inp, mode, group, device) | ||||
| def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | ||||
| """Send a Tensor to a remote process | |||||
| """Send a Tensor to a remote process. | |||||
| :param inp: tensor to send | |||||
| :param dest_rank: destination process rank | |||||
| :param inp: tensor to send. | |||||
| :param dest_rank: destination process rank. | |||||
| """ | """ | ||||
| op = RemoteSend() | op = RemoteSend() | ||||
| op.key = "{}->{}".format(get_rank(), dest_rank) | op.key = "{}->{}".format(get_rank(), dest_rank) | ||||
| @@ -266,12 +266,12 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor: | |||||
| def remote_recv( | def remote_recv( | ||||
| src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Receive a Tensor from a remote process | |||||
| """Receive a Tensor from a remote process. | |||||
| :param src_rank: source process rank | |||||
| :param shape: the shape of the tensor to receive | |||||
| :param dtype: the data type of the tensor to receive | |||||
| :param device: the device to place the received tensor | |||||
| :param src_rank: source process rank. | |||||
| :param shape: the shape of the tensor to receive. | |||||
| :param dtype: the data type of the tensor to receive. | |||||
| :param device: the device to place the received tensor. | |||||
| """ | """ | ||||
| key = "{}->{}".format(src_rank, get_rank()) | key = "{}->{}".format(src_rank, get_rank()) | ||||
| @@ -83,12 +83,12 @@ def init_process_group( | |||||
| ) -> None: | ) -> None: | ||||
| """Initialize the distributed process group and specify the device used in the current process | """Initialize the distributed process group and specify the device used in the current process | ||||
| :param master_ip: IP address of the master node | |||||
| :param port: Port available for all processes to communicate | |||||
| :param world_size: Total number of processes participating in the job | |||||
| :param rank: Rank of the current process | |||||
| :param device: The GPU device id to bind this process to | |||||
| :param backend: Communicator backend, currently support 'nccl' and 'ucx' | |||||
| :param master_ip: ip address of the master node. | |||||
| :param port: port available for all processes to communicate. | |||||
| :param world_size: total number of processes participating in the job. | |||||
| :param rank: rank of the current process. | |||||
| :param device: the GPU device id to bind this process to. | |||||
| :param backend: communicator backend, currently support 'nccl' and 'ucx'. | |||||
| """ | """ | ||||
| if not isinstance(master_ip, str): | if not isinstance(master_ip, str): | ||||
| raise TypeError("Expect type str but got {}".format(type(master_ip))) | raise TypeError("Expect type str but got {}".format(type(master_ip))) | ||||
| @@ -127,50 +127,50 @@ def init_process_group( | |||||
| def is_distributed() -> bool: | def is_distributed() -> bool: | ||||
| """Return True if the distributed process group has been initialized""" | |||||
| """Return True if the distributed process group has been initialized.""" | |||||
| return _sd is not None | return _sd is not None | ||||
| def get_rank() -> int: | def get_rank() -> int: | ||||
| """Get the rank of the current process""" | |||||
| """Get the rank of the current process.""" | |||||
| return _sd.proc_rank if _sd is not None else 0 | return _sd.proc_rank if _sd is not None else 0 | ||||
| def get_world_size() -> int: | def get_world_size() -> int: | ||||
| """Get the total number of processes participating in the job""" | |||||
| """Get the total number of processes participating in the job.""" | |||||
| return _sd.world_size if _sd is not None else 1 | return _sd.world_size if _sd is not None else 1 | ||||
| def get_backend() -> str: | def get_backend() -> str: | ||||
| """Get the backend str""" | |||||
| """Get the backend str.""" | |||||
| assert _sd is not None, "please call init_process_group first" | assert _sd is not None, "please call init_process_group first" | ||||
| return _sd.backend if _sd is not None else None | return _sd.backend if _sd is not None else None | ||||
| def get_py_server_addr() -> Tuple[str, int]: | def get_py_server_addr() -> Tuple[str, int]: | ||||
| """Get master_ip and port of python XML RPC server""" | |||||
| """Get master_ip and port of python XML RPC server.""" | |||||
| assert _sd is not None, "please call init_process_group first" | assert _sd is not None, "please call init_process_group first" | ||||
| return _sd.master_ip, _sd.py_server_port | return _sd.master_ip, _sd.py_server_port | ||||
| def get_mm_server_addr() -> Tuple[str, int]: | def get_mm_server_addr() -> Tuple[str, int]: | ||||
| """Get master_ip and port of C++ mm_server""" | |||||
| """Get master_ip and port of C++ mm_server.""" | |||||
| assert _sd is not None, "please call init_process_group first" | assert _sd is not None, "please call init_process_group first" | ||||
| return _sd.master_ip, _sd.mm_server_port | return _sd.master_ip, _sd.mm_server_port | ||||
| def get_client() -> Client: | def get_client() -> Client: | ||||
| """Get client of python XML RPC server""" | |||||
| """Get client of python XML RPC server.""" | |||||
| assert _sd is not None, "please call init_process_group first" | assert _sd is not None, "please call init_process_group first" | ||||
| return _sd.client | return _sd.client | ||||
| def new_group(proc_ranks: List[int]) -> Group: | def new_group(proc_ranks: List[int]) -> Group: | ||||
| """Build a subgroup containing certain ranks""" | |||||
| """Build a subgroup containing certain ranks.""" | |||||
| return Group(proc_ranks) | return Group(proc_ranks) | ||||
| def group_barrier(group: Optional[Group] = WORLD) -> None: | def group_barrier(group: Optional[Group] = WORLD) -> None: | ||||
| """Block until all ranks in the group reach this barrier""" | |||||
| """Block until all ranks in the group reach this barrier.""" | |||||
| assert isinstance(group, Group) | assert isinstance(group, Group) | ||||
| _sd.client.group_barrier(group.key, group.size) | _sd.client.group_barrier(group.key, group.size) | ||||
| @@ -17,11 +17,112 @@ import numpy as np | |||||
| from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager | from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager | ||||
| from megengine.device import get_default_device, get_device_count | from megengine.device import get_default_device, get_device_count | ||||
| from ..functional.param_pack import get_offsets, pack_allreduce_split | |||||
| from ..core.ops.builtin import ParamPackConcat, ParamPackSplit | |||||
| from ..core.tensor.core import apply | |||||
| from ..functional.utils import copy | from ..functional.utils import copy | ||||
| from ..tensor import Tensor | |||||
| from ..utils.future import Future | from ..utils.future import Future | ||||
| from .functional import all_reduce_sum, broadcast | from .functional import all_reduce_sum, broadcast | ||||
| from .group import WORLD, group_barrier, is_distributed | |||||
| from .group import WORLD, Group, group_barrier, is_distributed | |||||
| def param_pack_split(inp: Tensor, offsets: list, shapes: list): | |||||
| r""" | |||||
| Returns split tensor to tensor list as offsets and shapes described, | |||||
| only used for ``parampack``. | |||||
| :param inp: input tensor. | |||||
| :param offsets: offsets of outputs, length of `2 * n`, | |||||
| while n is tensor nums you want to split, | |||||
| format `[begin0, end0, begin1, end1]`. | |||||
| :param shapes: tensor shapes of outputs. | |||||
| :return: splitted tensors. | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| from megengine import tensor | |||||
| from megengine.distributed.helper import param_pack_split | |||||
| a = tensor(np.ones((10,), np.int32)) | |||||
| b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||||
| print(b.numpy()) | |||||
| print(c.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [1] | |||||
| [[1 1 1] | |||||
| [1 1 1] | |||||
| [1 1 1]] | |||||
| """ | |||||
| op = ParamPackSplit() | |||||
| op.offsets = offsets | |||||
| op.shapes = shapes | |||||
| return apply(op, inp) | |||||
| def param_pack_concat(inps: list, offsets: Tensor, offsets_val: list): | |||||
| r""" | |||||
| Returns concated tensor, only used for ``parampack``. | |||||
| :param inps: input tensors. | |||||
| :param offsets: device value of offsets. | |||||
| :param offsets_val: offsets of inputs, length of `2 * n`, | |||||
| format `[begin0, end0, begin1, end1]`. | |||||
| :return: concated tensor. | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| from megengine import tensor | |||||
| from megengine.distributed.helper import param_pack_concat | |||||
| a = tensor(np.ones((1,), np.int32)) | |||||
| b = tensor(np.ones((3, 3), np.int32)) | |||||
| offsets_val = [0, 1, 1, 10] | |||||
| offsets = tensor(offsets_val, np.int32) | |||||
| c = param_pack_concat([a, b], offsets, offsets_val) | |||||
| print(c.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [1 1 1 1 1 1 1 1 1 1] | |||||
| """ | |||||
| op = ParamPackConcat() | |||||
| op.offsets = offsets_val | |||||
| return apply(op, *inps, offsets)[0] | |||||
| def get_offsets(shapes): | |||||
| offsets = [] | |||||
| offset = 0 | |||||
| for shape in shapes: | |||||
| offsets.append(offset) | |||||
| offset += int(np.prod(shape)) | |||||
| offsets.append(offset) | |||||
| return offsets | |||||
| def pack_allreduce_split(pack_list, shapes, group, reduce_method): | |||||
| offsets_val = get_offsets(shapes) | |||||
| offsets = Tensor(offsets_val) | |||||
| packed_grads = param_pack_concat(pack_list, offsets, offsets_val) | |||||
| packed_grads = all_reduce_sum(packed_grads, group, group.comp_node) | |||||
| if reduce_method == "mean": | |||||
| packed_grads /= group.size | |||||
| grads = param_pack_split(packed_grads, offsets_val, shapes) | |||||
| return grads | |||||
| class TensorFuture(Future): | class TensorFuture(Future): | ||||
| @@ -54,28 +155,43 @@ def synchronized(func: Callable): | |||||
| return wrapper | return wrapper | ||||
| def get_device_count_by_fork(device_type: str): | |||||
| q = mp.Queue() | |||||
| def _get_device_count_worker(queue, device_type): | |||||
| num = get_device_count(device_type) | |||||
| queue.put(num) | |||||
| def worker(queue): | |||||
| num = get_device_count(device_type) | |||||
| queue.put(num) | |||||
| p = mp.Process(target=worker, args=(q,)) | |||||
| def get_device_count_by_fork(device_type: str): | |||||
| """Get device count in fork thread. | |||||
| See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork | |||||
| for more information. | |||||
| """ | |||||
| q = mp.Queue() | |||||
| p = mp.Process(target=_get_device_count_worker, args=(q, device_type)) | |||||
| p.start() | p.start() | ||||
| p.join() | p.join() | ||||
| return q.get() | return q.get() | ||||
| def bcast_list_(params, group): | |||||
| for p in params: | |||||
| p._reset(broadcast(p, group)) | |||||
| def bcast_list_(inps: list, group: Group = WORLD): | |||||
| """Broadcast tensors between given group. | |||||
| :param inps: input tensors. | |||||
| :param group: communication group. | |||||
| """ | |||||
| for inp in inps: | |||||
| inp._reset(broadcast(inp, group)) | |||||
| class AllreduceCallback: | class AllreduceCallback: | ||||
| def __init__(self, reduce_method, group=WORLD): | |||||
| """Allreduce Callback with tensor fusion optimization. | |||||
| :param reduce_method: the method to reduce gradiants. | |||||
| :param group: communication group. | |||||
| """ | |||||
| def __init__(self, reduce_method: str, group: Group = WORLD): | |||||
| reduce_method = reduce_method.lower() | reduce_method = reduce_method.lower() | ||||
| assert reduce_method in ["sum", "mean"] | |||||
| assert reduce_method in ["sum", "mean"], "reduce_method should be sum or mean" | |||||
| self._reduce_method = reduce_method | self._reduce_method = reduce_method | ||||
| self._group = group | self._group = group | ||||
| self._marked_gm = WeakSet() | self._marked_gm = WeakSet() | ||||
| @@ -88,6 +204,7 @@ class AllreduceCallback: | |||||
| self._futures_dict = dict() | self._futures_dict = dict() | ||||
| self._packing_list = defaultdict(list) | self._packing_list = defaultdict(list) | ||||
| self._packing_size = defaultdict(int) | self._packing_size = defaultdict(int) | ||||
| self._grad_origin_device = dict() | |||||
| def _pack(self, dtype): | def _pack(self, dtype): | ||||
| grad_list = [self._gradients_dict[p] for p in self._packing_list[dtype]] | grad_list = [self._gradients_dict[p] for p in self._packing_list[dtype]] | ||||
| @@ -109,6 +226,7 @@ class AllreduceCallback: | |||||
| self._params.append(param) | self._params.append(param) | ||||
| self._futures_dict[param] = TensorFuture(ack=False) | self._futures_dict[param] = TensorFuture(ack=False) | ||||
| self._gradients_dict[param] = grad | self._gradients_dict[param] = grad | ||||
| self._grad_origin_device[param] = str(grad.device) | |||||
| dtype_str = str(np.dtype(param.dtype)) | dtype_str = str(np.dtype(param.dtype)) | ||||
| dtype_size = np.dtype(param.dtype).itemsize | dtype_size = np.dtype(param.dtype).itemsize | ||||
| @@ -123,6 +241,7 @@ class AllreduceCallback: | |||||
| self._pack(dtype) | self._pack(dtype) | ||||
| for param in self._params: | for param in self._params: | ||||
| grad = self._gradients_dict[param] | grad = self._gradients_dict[param] | ||||
| grad = copy(grad, self._grad_origin_device[param]) | |||||
| self._futures_dict[param].set(grad) | self._futures_dict[param].set(grad) | ||||
| self._reset() | self._reset() | ||||
| @@ -15,7 +15,7 @@ from .util import get_free_ports | |||||
| def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs): | def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs): | ||||
| """init distributed process group and run wrapped function""" | |||||
| """Init distributed process group and run wrapped function.""" | |||||
| init_process_group( | init_process_group( | ||||
| master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev | master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev | ||||
| ) | ) | ||||
| @@ -23,7 +23,7 @@ def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs): | |||||
| def launcher(func): | def launcher(func): | ||||
| """decorator for launching multiple processes in single-machine multi-gpu training""" | |||||
| """Decorator for launching multiple processes in single-machine multi-gpu training.""" | |||||
| n_gpus = get_device_count_by_fork("gpu") | n_gpus = get_device_count_by_fork("gpu") | ||||
| @@ -21,6 +21,12 @@ from .util import get_free_ports | |||||
| class Methods: | class Methods: | ||||
| """Distributed Server Method. | |||||
| Used for exchange information between distributed nodes. | |||||
| :param mm_server_port: multiple machine rpc server port. | |||||
| """ | |||||
| def __init__(self, mm_server_port): | def __init__(self, mm_server_port): | ||||
| self.lock = threading.Lock() | self.lock = threading.Lock() | ||||
| self.mm_server_port = mm_server_port | self.mm_server_port = mm_server_port | ||||
| @@ -31,51 +37,65 @@ class Methods: | |||||
| self.dict_barrier_event = defaultdict(threading.Event) | self.dict_barrier_event = defaultdict(threading.Event) | ||||
| def connect(self): | def connect(self): | ||||
| """Method for checking connection success.""" | |||||
| return True | return True | ||||
| def get_mm_server_port(self): | def get_mm_server_port(self): | ||||
| """Get multiple machine rpc server port.""" | |||||
| return self.mm_server_port | return self.mm_server_port | ||||
| def set_is_grad(self, rank_peer, is_grad): | |||||
| def set_is_grad(self, key, is_grad): | |||||
| """Mark send/recv need gradiants by key. | |||||
| :param key: key to match send/recv op. | |||||
| :param is_grad: whether this op need grad. | |||||
| """ | |||||
| with self.lock: | with self.lock: | ||||
| future = self.dict_is_grad[rank_peer] | |||||
| future = self.dict_is_grad[key] | |||||
| future.set(is_grad) | future.set(is_grad) | ||||
| return True | return True | ||||
| def check_is_grad(self, rank_peer): | |||||
| def check_is_grad(self, key): | |||||
| """Check whether send/recv need gradiants. | |||||
| :param key: key to match send/recv op. | |||||
| """ | |||||
| with self.lock: | with self.lock: | ||||
| future = self.dict_is_grad[rank_peer] | |||||
| future = self.dict_is_grad[key] | |||||
| ret = future.get() | ret = future.get() | ||||
| with self.lock: | with self.lock: | ||||
| del self.dict_is_grad[rank_peer] | |||||
| del self.dict_is_grad[key] | |||||
| return ret | return ret | ||||
| def set_remote_tracer(self, rank_peer, tracer_set): | |||||
| def set_remote_tracer(self, key, tracer_set): | |||||
| """Set tracer dict for tracing send/recv op. | |||||
| :param key: key to match send/recv op. | |||||
| :param tracer_set: valid tracer set. | |||||
| """ | |||||
| with self.lock: | with self.lock: | ||||
| future = self.dict_remote_tracer[rank_peer] | |||||
| future = self.dict_remote_tracer[key] | |||||
| future.set(tracer_set) | future.set(tracer_set) | ||||
| return True | return True | ||||
| def check_remote_tracer(self, rank_peer): | |||||
| def check_remote_tracer(self, key): | |||||
| """Get tracer dict for send/recv op. | |||||
| :param key: key to match send/recv op. | |||||
| """ | |||||
| with self.lock: | with self.lock: | ||||
| future = self.dict_remote_tracer[rank_peer] | |||||
| future = self.dict_remote_tracer[key] | |||||
| ret = future.get() | ret = future.get() | ||||
| with self.lock: | with self.lock: | ||||
| del self.dict_remote_tracer[rank_peer] | |||||
| del self.dict_remote_tracer[key] | |||||
| return ret | return ret | ||||
| def set_pack_list(self, key, pack_list): | |||||
| with self.lock: | |||||
| future = self.dict_pack_list[key] | |||||
| future.set(pack_list) | |||||
| return True | |||||
| def get_pack_list(self, key): | |||||
| with self.lock: | |||||
| future = self.dict_pack_list[key] | |||||
| return future.get() | |||||
| def group_barrier(self, key, size): | def group_barrier(self, key, size): | ||||
| """A barrier wait for all group member. | |||||
| :param key: group key to match each other. | |||||
| :param size: group size. | |||||
| """ | |||||
| with self.lock: | with self.lock: | ||||
| self.dict_barrier_counter[key] += 1 | self.dict_barrier_counter[key] += 1 | ||||
| counter = self.dict_barrier_counter[key] | counter = self.dict_barrier_counter[key] | ||||
| @@ -94,12 +114,23 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): | |||||
| def start_server(py_server_port, mm_server_port): | def start_server(py_server_port, mm_server_port): | ||||
| """Start python distributed server and multiple machine server. | |||||
| :param py_server_port: python server port. | |||||
| :param mm_server_port: multiple machine server port. | |||||
| """ | |||||
| server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False) | server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False) | ||||
| server.register_instance(Methods(mm_server_port)) | server.register_instance(Methods(mm_server_port)) | ||||
| server.serve_forever() | server.serve_forever() | ||||
| class Server: | class Server: | ||||
| """Distributed Server for distributed training. | |||||
| Should be running at master node. | |||||
| :param port: python server port. | |||||
| """ | |||||
| def __init__(self, port): | def __init__(self, port): | ||||
| self.py_server_port = get_free_ports(1)[0] if port == 0 else port | self.py_server_port = get_free_ports(1)[0] if port == 0 else port | ||||
| self.mm_server_port = create_mm_server("0.0.0.0", 0) | self.mm_server_port = create_mm_server("0.0.0.0", 0) | ||||
| @@ -112,12 +143,19 @@ class Server: | |||||
| class Client: | class Client: | ||||
| """Distributed Client for distributed training. | |||||
| :param master_ip: ip address of master node. | |||||
| :param port: port of server at master node. | |||||
| """ | |||||
| def __init__(self, master_ip, port): | def __init__(self, master_ip, port): | ||||
| self.master_ip = master_ip | self.master_ip = master_ip | ||||
| self.port = port | self.port = port | ||||
| self.connect() | self.connect() | ||||
| def connect(self): | def connect(self): | ||||
| """Check connection success.""" | |||||
| while True: | while True: | ||||
| try: | try: | ||||
| self.proxy = ServerProxy( | self.proxy = ServerProxy( | ||||
| @@ -129,25 +167,43 @@ class Client: | |||||
| time.sleep(1) | time.sleep(1) | ||||
| def get_mm_server_port(self): | def get_mm_server_port(self): | ||||
| """Get multiple machine server port.""" | |||||
| return self.proxy.get_mm_server_port() | return self.proxy.get_mm_server_port() | ||||
| def set_is_grad(self, rank_peer, is_grad): | |||||
| self.proxy.set_is_grad(rank_peer, is_grad) | |||||
| def check_is_grad(self, rank_peer): | |||||
| return self.proxy.check_is_grad(rank_peer) | |||||
| def set_remote_tracer(self, rank_peer, tracer_set): | |||||
| self.proxy.set_remote_tracer(rank_peer, tracer_set) | |||||
| def check_remote_tracer(self, rank_peer): | |||||
| return self.proxy.check_remote_tracer(rank_peer) | |||||
| def set_pack_list(self, key, pack_list): | |||||
| self.proxy.set_pack_list(key, pack_list) | |||||
| def get_pack_list(self, key): | |||||
| return self.proxy.get_pack_list(key) | |||||
| def set_is_grad(self, key, is_grad): | |||||
| """Mark send/recv need gradiants by key. | |||||
| :param key: key to match send/recv op. | |||||
| :param is_grad: whether this op need grad. | |||||
| """ | |||||
| self.proxy.set_is_grad(key, is_grad) | |||||
| def check_is_grad(self, key): | |||||
| """Check whether send/recv need gradiants. | |||||
| :param key: key to match send/recv op. | |||||
| """ | |||||
| return self.proxy.check_is_grad(key) | |||||
| def set_remote_tracer(self, key, tracer_set): | |||||
| """Set tracer dict for tracing send/recv op. | |||||
| :param key: key to match send/recv op. | |||||
| :param tracer_set: valid tracer set. | |||||
| """ | |||||
| self.proxy.set_remote_tracer(key, tracer_set) | |||||
| def check_remote_tracer(self, key): | |||||
| """Get tracer dict for send/recv op. | |||||
| :param key: key to match send/recv op. | |||||
| """ | |||||
| return self.proxy.check_remote_tracer(key) | |||||
| def group_barrier(self, key, size): | def group_barrier(self, key, size): | ||||
| """A barrier wait for all group member. | |||||
| :param key: group key to match each other. | |||||
| :param size: group size. | |||||
| """ | |||||
| self.proxy.group_barrier(key, size) | self.proxy.group_barrier(key, size) | ||||
| @@ -8,13 +8,10 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # pylint: disable=redefined-builtin | # pylint: disable=redefined-builtin | ||||
| from .elemwise import * | from .elemwise import * | ||||
| from .graph import add_update | |||||
| from .loss import * | |||||
| from .math import * | from .math import * | ||||
| from .nn import * | from .nn import * | ||||
| from .quantized import conv_bias_activation | |||||
| from .tensor import * | from .tensor import * | ||||
| from .utils import accuracy, copy | |||||
| from .utils import * | |||||
| from . import distributed # isort:skip | from . import distributed # isort:skip | ||||
| @@ -26,14 +26,14 @@ def set_conv_execution_strategy(option: str): | |||||
| Available values: | Available values: | ||||
| * 'HEURISTIC' uses heuristic to choose the fastest algorithm. | * 'HEURISTIC' uses heuristic to choose the fastest algorithm. | ||||
| * 'PROFILE' runs possible algorithms on real device to find the best. | |||||
| * 'PROFILE_HEURISTIC' uses profile result and heuristic to choose the fastest algorithm. | |||||
| * 'PROFILE_REPRODUCIBLE' uses the fastest of profile result that is also reproducible. | |||||
| * 'PROFILE' runs possible algorithms on real device to find the best one. | |||||
| * 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm. | |||||
| * 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible. | |||||
| * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. | ||||
| The default strategy is 'HEURISTIC'. | The default strategy is 'HEURISTIC'. | ||||
| It can also be set through the environmental variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'. | |||||
| It can also be set through the environment variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'. | |||||
| """ | """ | ||||
| valid_option = ( | valid_option = ( | ||||
| "HEURISTIC", | "HEURISTIC", | ||||
| @@ -26,23 +26,22 @@ __all__ = [ | |||||
| "acosh", | "acosh", | ||||
| "atanh", | "atanh", | ||||
| "ceil", | "ceil", | ||||
| "clamp", | |||||
| "clip", | |||||
| "cos", | "cos", | ||||
| "cosh", | "cosh", | ||||
| "div", | "div", | ||||
| "eq", | |||||
| "equal", | |||||
| "exp", | "exp", | ||||
| "expm1", | "expm1", | ||||
| "fast_tanh", | |||||
| "floor", | "floor", | ||||
| "floor_div", | "floor_div", | ||||
| "gt", | |||||
| "ge", | |||||
| "greater", | |||||
| "greater_equal", | |||||
| "hswish", | "hswish", | ||||
| "hsigmoid", | "hsigmoid", | ||||
| "left_shift", | "left_shift", | ||||
| "lt", | |||||
| "le", | |||||
| "less", | |||||
| "less_equal", | |||||
| "log", | "log", | ||||
| "log1p", | "log1p", | ||||
| "logical_and", | "logical_and", | ||||
| @@ -54,7 +53,7 @@ __all__ = [ | |||||
| "mod", | "mod", | ||||
| "mul", | "mul", | ||||
| "neg", | "neg", | ||||
| "ne", | |||||
| "not_equal", | |||||
| "pow", | "pow", | ||||
| "relu", | "relu", | ||||
| "relu6", | "relu6", | ||||
| @@ -88,13 +87,6 @@ def _elwise(*args, mode): | |||||
| return result | return result | ||||
| def _logical(*args, mode): | |||||
| op = builtin.CondExecPredLogical(mode=mode) | |||||
| args = utils.convert_inputs(*args) | |||||
| (result,) = apply(op, *args) | |||||
| return result | |||||
| def _elemwise_multi_type(*args, mode, **kwargs): | def _elemwise_multi_type(*args, mode, **kwargs): | ||||
| op = builtin.ElemwiseMultiType(mode=mode, **kwargs) | op = builtin.ElemwiseMultiType(mode=mode, **kwargs) | ||||
| args = utils.convert_inputs(*args) | args = utils.convert_inputs(*args) | ||||
| @@ -106,9 +98,10 @@ def _elemwise_multi_type(*args, mode, **kwargs): | |||||
| def add(x, y): | def add(x, y): | ||||
| """Element-wise addition. | |||||
| """Element-wise `addition`. | |||||
| At least one operand should be tensor. | At least one operand should be tensor. | ||||
| Same for sub/mul/div/floor_div/pow/mod/atan2/eq/ne/lt/le/gt/ge/maximum/minmium. | |||||
| Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium. | |||||
| :param x: input tensor. | :param x: input tensor. | ||||
| :return: computed tensor. | :return: computed tensor. | ||||
| @@ -138,68 +131,68 @@ def add(x, y): | |||||
| def sub(x, y): | def sub(x, y): | ||||
| """Element-wise subtraction.""" | |||||
| """Element-wise `subtraction`.""" | |||||
| return _elwise(x, y, mode="sub") | return _elwise(x, y, mode="sub") | ||||
| def mul(x, y): | def mul(x, y): | ||||
| """Element-wise multiplication.""" | |||||
| """Element-wise `multiplication`.""" | |||||
| return _elwise(x, y, mode="mul") | return _elwise(x, y, mode="mul") | ||||
| def div(x, y): | def div(x, y): | ||||
| """Element-wise (x / y).""" | |||||
| """Element-wise `(x / y)`.""" | |||||
| return _elwise(x, y, mode="true_div") | return _elwise(x, y, mode="true_div") | ||||
| def floor_div(x, y): | def floor_div(x, y): | ||||
| """Element-wise floor(x / y).""" | |||||
| """Element-wise `floor(x / y)`.""" | |||||
| return _elwise(x, y, mode="floor_divide") | return _elwise(x, y, mode="floor_divide") | ||||
| def neg(x): | def neg(x): | ||||
| """Element-wise negation.""" | |||||
| """Element-wise `negation`.""" | |||||
| return _elwise(x, mode="negate") | return _elwise(x, mode="negate") | ||||
| def pow(x, y): | def pow(x, y): | ||||
| """Element-wise power.""" | |||||
| """Element-wise `power`.""" | |||||
| return _elwise(x, y, mode="pow") | return _elwise(x, y, mode="pow") | ||||
| def mod(x, y): | def mod(x, y): | ||||
| """Element-wise remainder of division.""" | |||||
| """Element-wise `remainder of division`.""" | |||||
| return _elwise(x, y, mode="mod") | return _elwise(x, y, mode="mod") | ||||
| def abs(x): | def abs(x): | ||||
| """Element-wise absolute value.""" | |||||
| """Element-wise `absolute value`.""" | |||||
| return _elwise(x, mode="abs") | return _elwise(x, mode="abs") | ||||
| def exp(x): | def exp(x): | ||||
| """Element-wise exponential.""" | |||||
| """Element-wise `exponential`.""" | |||||
| return _elwise(x, mode="exp") | return _elwise(x, mode="exp") | ||||
| def expm1(x): | def expm1(x): | ||||
| """Element-wise exp(x)-1.""" | |||||
| """Element-wise `exp(x)-1`.""" | |||||
| return _elwise(x, mode="expm1") | return _elwise(x, mode="expm1") | ||||
| def log(x): | def log(x): | ||||
| """Element-wise logarithm (base `e`).""" | |||||
| """Element-wise `logarithm (base e)`.""" | |||||
| return _elwise(x, mode="log") | return _elwise(x, mode="log") | ||||
| def log1p(x): | def log1p(x): | ||||
| """Element-wise log(x+1) (base `e`).""" | |||||
| """Element-wise `log(x+1) (base e)`.""" | |||||
| return _elwise(x, mode="log1p") | return _elwise(x, mode="log1p") | ||||
| def sqrt(x: Tensor) -> Tensor: | def sqrt(x: Tensor) -> Tensor: | ||||
| """Element-wise sqrt. | |||||
| For negative input value, return ``NaN``. | |||||
| """Element-wise `sqrt`. | |||||
| Returns ``NaN`` for negative input value. | |||||
| :param x: input tensor. | :param x: input tensor. | ||||
| :return: computed tensor. | :return: computed tensor. | ||||
| @@ -229,10 +222,10 @@ def sqrt(x: Tensor) -> Tensor: | |||||
| def square(x: Tensor) -> Tensor: | def square(x: Tensor) -> Tensor: | ||||
| """ | """ | ||||
| Return a new tensor with the square of the elements of input tensor. | |||||
| Returns a new tensor with the square of the elements of input tensor. | |||||
| :param inp: The input tensor | |||||
| :return: The computed tensor | |||||
| :param inp: input tensor. | |||||
| :return: computed tensor. | |||||
| Examples: | Examples: | ||||
| @@ -258,27 +251,27 @@ def square(x: Tensor) -> Tensor: | |||||
| def round(x): | def round(x): | ||||
| """Element-wise rounding to int.""" | |||||
| """Element-wise `rounding to int`.""" | |||||
| return _elwise(x, mode="round") | return _elwise(x, mode="round") | ||||
| def ceil(x): | def ceil(x): | ||||
| """Element-wise ceiling.""" | |||||
| """Element-wise `ceiling`.""" | |||||
| return _elwise(x, mode="ceil") | return _elwise(x, mode="ceil") | ||||
| def floor(x): | def floor(x): | ||||
| """Element-wise floor.""" | |||||
| """Element-wise `floor`.""" | |||||
| return _elwise(x, mode="floor") | return _elwise(x, mode="floor") | ||||
| def maximum(x, y): | def maximum(x, y): | ||||
| """Element-wise maximum of array elements.""" | |||||
| """Element-wise `maximum of array elements`.""" | |||||
| return _elwise(x, y, mode="max") | return _elwise(x, y, mode="max") | ||||
| def minimum(x, y): | def minimum(x, y): | ||||
| """Element-wise minimum of array elements.""" | |||||
| """Element-wise `minimum of array elements`.""" | |||||
| return _elwise(x, y, mode="min") | return _elwise(x, y, mode="min") | ||||
| @@ -286,7 +279,7 @@ def minimum(x, y): | |||||
| def cos(x): | def cos(x): | ||||
| """Element-wise cosine. | |||||
| """Element-wise `cosine`. | |||||
| :param x: input tensor. | :param x: input tensor. | ||||
| :return: computed tensor. | :return: computed tensor. | ||||
| @@ -315,80 +308,71 @@ def cos(x): | |||||
| def sin(x): | def sin(x): | ||||
| """Element-wise sine.""" | |||||
| """Element-wise `sine`.""" | |||||
| return _elwise(x, mode="sin") | return _elwise(x, mode="sin") | ||||
| def tan(x): | def tan(x): | ||||
| """Element-wise tangent.""" | |||||
| """Element-wise `tangent`.""" | |||||
| return sin(x) / cos(x) | return sin(x) / cos(x) | ||||
| def acos(x): | def acos(x): | ||||
| """Element-wise inverse cosine.""" | |||||
| """Element-wise `inverse cosine`.""" | |||||
| return _elwise(x, mode="acos") | return _elwise(x, mode="acos") | ||||
| def asin(x): | def asin(x): | ||||
| """Element-wise inverse sine.""" | |||||
| """Element-wise `inverse sine`.""" | |||||
| return _elwise(x, mode="asin") | return _elwise(x, mode="asin") | ||||
| def atan(x): | def atan(x): | ||||
| """Element-wise inverse tangent.""" | |||||
| """Element-wise `inverse tangent`.""" | |||||
| return _elwise(x, 1, mode="atan2") | return _elwise(x, 1, mode="atan2") | ||||
| def atan2(y, x): | def atan2(y, x): | ||||
| """Element-wise 2-argument arctangent.""" | |||||
| """Element-wise `2-argument arctangent`.""" | |||||
| return _elwise(y, x, mode="atan2") | return _elwise(y, x, mode="atan2") | ||||
| def cosh(x): | def cosh(x): | ||||
| r"""Element-wise hyperbolic cosine.""" | |||||
| r"""Element-wise `hyperbolic cosine`.""" | |||||
| return 0.5 * (exp(x) + exp(-x)) | return 0.5 * (exp(x) + exp(-x)) | ||||
| def sinh(x): | def sinh(x): | ||||
| r"""Element-wise hyperbolic sine.""" | |||||
| r"""Element-wise `hyperbolic sine`.""" | |||||
| u = expm1(x) | u = expm1(x) | ||||
| return 0.5 * u / (u + 1) * (u + 2) | return 0.5 * u / (u + 1) * (u + 2) | ||||
| def tanh(x): | def tanh(x): | ||||
| r"""Element-wise hyperbolic tangent.""" | |||||
| r"""Element-wise `hyperbolic tangent`.""" | |||||
| return _elwise(x, mode="tanh") | return _elwise(x, mode="tanh") | ||||
| def asinh(x): | def asinh(x): | ||||
| r"""Element-wise inverse hyperbolic sine.""" | |||||
| r"""Element-wise `inverse hyperbolic sine`.""" | |||||
| return log(x + (x ** 2 + 1) ** 0.5) | return log(x + (x ** 2 + 1) ** 0.5) | ||||
| def acosh(x): | def acosh(x): | ||||
| r"""Element-wise inverse hyperbolic cosine.""" | |||||
| r"""Element-wise `inverse hyperbolic cosine`.""" | |||||
| return log(x + (x ** 2 - 1) ** 0.5) | return log(x + (x ** 2 - 1) ** 0.5) | ||||
| def atanh(x): | def atanh(x): | ||||
| r"""Element-wise inverse hyperbolic tangent.""" | |||||
| r"""Element-wise `inverse hyperbolic tangent`.""" | |||||
| return log1p(2 * x / (1 - x)) / 2 | return log1p(2 * x / (1 - x)) / 2 | ||||
| def fast_tanh(x): | |||||
| r"""Element-wise fast tanh; this is an approximation: | |||||
| .. math:: | |||||
| \text{fast_tanh}(x) = x * (27. + x * x) / (27. + 9. * x * x) | |||||
| """ | |||||
| return _elwise(x, mode="fast_tanh") | |||||
| # bit-twiddling functions | # bit-twiddling functions | ||||
| def left_shift(x, y): | def left_shift(x, y): | ||||
| """Element-wise bitwise binary: x << y. | |||||
| """Element-wise `bitwise binary: x << y`. | |||||
| :param x: input tensor, should be int. | :param x: input tensor, should be int. | ||||
| :param y: how many bits to be left-shifted. | :param y: how many bits to be left-shifted. | ||||
| @@ -418,7 +402,7 @@ def left_shift(x, y): | |||||
| def right_shift(x, y): | def right_shift(x, y): | ||||
| """Element-wise bitwise binary: x >> y.""" | |||||
| """Element-wise `bitwise binary: x >> y`.""" | |||||
| return _elwise(x, y, mode="shr") | return _elwise(x, y, mode="shr") | ||||
| @@ -426,30 +410,30 @@ def right_shift(x, y): | |||||
| def logical_and(x, y): | def logical_and(x, y): | ||||
| """Element-wise logical and: x && y.""" | |||||
| """Element-wise `logical and: x && y`.""" | |||||
| return _elwise(x, y, mode="AND") | return _elwise(x, y, mode="AND") | ||||
| def logical_not(x): | def logical_not(x): | ||||
| """Element-wise logical not: ~x.""" | |||||
| """Element-wise `logical not: ~x`.""" | |||||
| return _elwise(x, mode="NOT") | return _elwise(x, mode="NOT") | ||||
| def logical_or(x, y): | def logical_or(x, y): | ||||
| """Element-wise logical or: x || y.""" | |||||
| """Element-wise `logical or: x || y`.""" | |||||
| return _elwise(x, y, mode="OR") | return _elwise(x, y, mode="OR") | ||||
| def logical_xor(x, y): | def logical_xor(x, y): | ||||
| """Element-wise logical xor: x ^ y.""" | |||||
| """Element-wise `logical xor: x ^ y`.""" | |||||
| return _elwise(x, y, mode="XOR") | return _elwise(x, y, mode="XOR") | ||||
| # comparison functions | # comparison functions | ||||
| def eq(x, y): | |||||
| """Element-wise (x == y). | |||||
| def equal(x, y): | |||||
| """Element-wise `(x == y)`. | |||||
| :param x: input tensor 1. | :param x: input tensor 1. | ||||
| :param y: input tensor 2. | :param y: input tensor 2. | ||||
| @@ -465,7 +449,7 @@ def eq(x, y): | |||||
| x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
| y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
| out = F.eq(x, y) | |||||
| out = F.equal(x, y) | |||||
| print(out.numpy()) | print(out.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -479,28 +463,28 @@ def eq(x, y): | |||||
| return _elwise(x, y, mode="eq") | return _elwise(x, y, mode="eq") | ||||
| def ne(x, y): | |||||
| """Element-wise (x != y).""" | |||||
| def not_equal(x, y): | |||||
| """Element-wise `(x != y)`.""" | |||||
| return x != y | return x != y | ||||
| def lt(x, y): | |||||
| """Element-wise (x < y).""" | |||||
| def less(x, y): | |||||
| """Element-wise `(x < y)`.""" | |||||
| return _elwise(x, y, mode="lt") | return _elwise(x, y, mode="lt") | ||||
| def le(x, y): | |||||
| """Element-wise (x <= y).""" | |||||
| def less_equal(x, y): | |||||
| """Element-wise `(x <= y)`.""" | |||||
| return _elwise(x, y, mode="leq") | return _elwise(x, y, mode="leq") | ||||
| def gt(x, y): | |||||
| """Element-wise (x > y).""" | |||||
| def greater(x, y): | |||||
| """Element-wise `(x > y)`.""" | |||||
| return _elwise(y, x, mode="lt") | return _elwise(y, x, mode="lt") | ||||
| def ge(x, y): | |||||
| """Element-wise (x >= y).""" | |||||
| def greater_equal(x, y): | |||||
| """Element-wise `(x >= y)`.""" | |||||
| return _elwise(y, x, mode="leq") | return _elwise(y, x, mode="leq") | ||||
| @@ -508,7 +492,7 @@ def ge(x, y): | |||||
| def hswish(x): | def hswish(x): | ||||
| """Element-wise x * relu6(x + 3) / 6. | |||||
| """Element-wise `x * relu6(x + 3) / 6`. | |||||
| :param x: input tensor. | :param x: input tensor. | ||||
| :return: computed tensor. | :return: computed tensor. | ||||
| @@ -534,7 +518,7 @@ def hswish(x): | |||||
| def hsigmoid(x): | def hsigmoid(x): | ||||
| """Element-wise relu6(x + 3) / 6.""" | |||||
| """Element-wise `relu6(x + 3) / 6`.""" | |||||
| return relu6(x + 3) / 6 | return relu6(x + 3) / 6 | ||||
| @@ -544,16 +528,16 @@ def relu(x): | |||||
| def relu6(x): | def relu6(x): | ||||
| """Element-wise min(max(x, 0), 6).""" | |||||
| """Element-wise `min(max(x, 0), 6)`.""" | |||||
| return minimum(maximum(x, 0), 6) | return minimum(maximum(x, 0), 6) | ||||
| def sigmoid(x): | def sigmoid(x): | ||||
| """Element-wise 1 / ( 1 + exp( -x ) ).""" | |||||
| """Element-wise `1 / ( 1 + exp( -x ) )`.""" | |||||
| return _elwise(x, mode="sigmoid") | return _elwise(x, mode="sigmoid") | ||||
| def clamp(x: Tensor, lower=None, upper=None) -> Tensor: | |||||
| def clip(x: Tensor, lower=None, upper=None) -> Tensor: | |||||
| r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns | ||||
| a resulting tensor: | a resulting tensor: | ||||
| @@ -578,9 +562,9 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor: | |||||
| import megengine.functional as F | import megengine.functional as F | ||||
| a = tensor(np.arange(5).astype(np.int32)) | a = tensor(np.arange(5).astype(np.int32)) | ||||
| print(F.clamp(a, 2, 4).numpy()) | |||||
| print(F.clamp(a, lower=3).numpy()) | |||||
| print(F.clamp(a, upper=3).numpy()) | |||||
| print(F.clip(a, 2, 4).numpy()) | |||||
| print(F.clip(a, lower=3).numpy()) | |||||
| print(F.clip(a, upper=3).numpy()) | |||||
| Outputs: | Outputs: | ||||
| @@ -596,7 +580,7 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor: | |||||
| ), "At least one of 'lower' or 'upper' must not be None" | ), "At least one of 'lower' or 'upper' must not be None" | ||||
| if lower is not None: | if lower is not None: | ||||
| if upper is not None: | if upper is not None: | ||||
| assert lower <= upper, "clamp lower bound is bigger that upper bound" | |||||
| assert lower <= upper, "clip lower bound is bigger that upper bound" | |||||
| return minimum(maximum(x, lower), upper) | return minimum(maximum(x, lower), upper) | ||||
| else: | else: | ||||
| return maximum(x, lower) | return maximum(x, lower) | ||||
| @@ -1,44 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # pylint: disable=too-many-lines | |||||
| from typing import List | |||||
| from ..tensor import Tensor | |||||
| def cambricon_subgraph( | |||||
| inputs: List[Tensor], data: bytes, symbol: str, tensor_dim_mutable: bool, | |||||
| ) -> List[Tensor]: | |||||
| """Loads a serialized Cambricon subgraph (i.e. cnrtModel_t) and | |||||
| execute the operations defined in the subgraph. | |||||
| :param inputs: list of input tensors of the subgraph. | |||||
| :param data: the serialized subgraph. | |||||
| :param symbol: the name of the function in the subgraph. | |||||
| The function is corresponding to a cnmlFusionOp | |||||
| which is added to the cnmlModel_t/cnrtModel_t. | |||||
| :param tensor_dim_mutable: whether the input tensors' shapes are mutalbe | |||||
| in cnrtModel_t. | |||||
| """ | |||||
| raise NotImplementedError | |||||
| def extern_opr_subgraph( | |||||
| inputs, output_shapes: List[tuple], dump_name: str, dump_data: bytes, | |||||
| ) -> List[Tensor]: | |||||
| """Loads a serialized extern opr subgraph and fake execute the operator. | |||||
| :param inputs: tensor or list of input tensors. | |||||
| :param output_shapes: the output shapes. | |||||
| :param dump_name: the serialized subgraph name. | |||||
| :param dump_data: the serialized subgraph. | |||||
| :return: list of tensors. | |||||
| """ | |||||
| raise NotImplementedError | |||||
| @@ -1,41 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| import collections | |||||
| from typing import Iterable, Optional, Union | |||||
| from ..tensor import Tensor | |||||
| def add_update( | |||||
| dest: Tensor, | |||||
| delta: Tensor, | |||||
| *, | |||||
| alpha: Union[Tensor, float, int] = 1.0, | |||||
| beta: Union[Tensor, float, int] = 1.0, | |||||
| bias: Union[Tensor, float, int] = 0.0 | |||||
| ): | |||||
| r"""Modify ``dest`` inplace as follows: | |||||
| .. math:: | |||||
| dest = alpha * dest + beta * delta + bias | |||||
| :param dest: input data that will be inplace modified. | |||||
| :param delta: update value that will be added to ``dest``. | |||||
| :param alpha: weight ratio of ``dest``. Default: 1.0 | |||||
| :param beta: weight ratio of ``delta``. Default: 1.0 | |||||
| :param bias: bias value appended to the result. Default: 0.0 | |||||
| """ | |||||
| if beta is not None and beta != 1.0: | |||||
| delta = delta * beta | |||||
| if bias is not None and bias != 0.0: | |||||
| delta = delta + bias | |||||
| if alpha is not None and alpha != 1.0: | |||||
| dest *= alpha | |||||
| dest += delta | |||||
| return dest | |||||
| @@ -10,14 +10,14 @@ import numpy as np | |||||
| from ..core.tensor.utils import make_shape_tuple | from ..core.tensor.utils import make_shape_tuple | ||||
| from ..tensor import Tensor | from ..tensor import Tensor | ||||
| from .elemwise import abs, eq, exp, log, maximum, pow, relu | |||||
| from .nn import indexing_one_hot | |||||
| from .elemwise import abs, equal, exp, log, maximum, pow, relu | |||||
| from .nn import indexing_one_hot, logsigmoid, logsumexp | |||||
| from .tensor import where | from .tensor import where | ||||
| __all__ = [ | __all__ = [ | ||||
| "l1_loss", | "l1_loss", | ||||
| "square_loss", | "square_loss", | ||||
| "cross_entropy_with_softmax", | |||||
| "cross_entropy", | |||||
| "binary_cross_entropy", | "binary_cross_entropy", | ||||
| "hinge_loss", | "hinge_loss", | ||||
| ] | ] | ||||
| @@ -55,7 +55,7 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor: | |||||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | ||||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | ||||
| loss = F.l1_loss(ipt, tgt) | |||||
| loss = F.nn.l1_loss(ipt, tgt) | |||||
| print(loss.numpy()) | print(loss.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -106,7 +106,7 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor: | |||||
| ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) | ||||
| tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) | ||||
| loss = F.square_loss(ipt, tgt) | |||||
| loss = F.nn.square_loss(ipt, tgt) | |||||
| print(loss.numpy()) | print(loss.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -120,10 +120,16 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor: | |||||
| return (diff ** 2).mean() | return (diff ** 2).mean() | ||||
| def cross_entropy_with_softmax( | |||||
| pred: Tensor, label: Tensor, axis: int = 1, label_smooth: float = 0 | |||||
| def cross_entropy( | |||||
| pred: Tensor, | |||||
| label: Tensor, | |||||
| axis: int = 1, | |||||
| with_logits: bool = True, | |||||
| label_smooth: float = 0, | |||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r"""Returns loss after applying :func:`~.softmax` + :func:`~.cross_entropy`. | |||||
| r"""Compute the multi-class cross entropy loss (using logits by default). | |||||
| By default, prediction is assumed to be logits, whose softmax gives probabilities. | |||||
| It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. | It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. | ||||
| @@ -132,11 +138,12 @@ def cross_entropy_with_softmax( | |||||
| .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K | .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K | ||||
| where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. | where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. | ||||
| k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes. | |||||
| k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes. | |||||
| :param pred: input tensor representing the predicted probability. | :param pred: input tensor representing the predicted probability. | ||||
| :param label: input tensor representing the classification label. | :param label: input tensor representing the classification label. | ||||
| :param axis: an axis along which softmax will be applied. Default: 1 | :param axis: an axis along which softmax will be applied. Default: 1 | ||||
| :param with_logits: whether to apply softmax first. Default: True | |||||
| :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 | ||||
| :return: loss value. | :return: loss value. | ||||
| @@ -150,9 +157,9 @@ def cross_entropy_with_softmax( | |||||
| data_shape = (1, 2) | data_shape = (1, 2) | ||||
| label_shape = (1, ) | label_shape = (1, ) | ||||
| pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape)) | |||||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) | |||||
| label = tensor(np.ones(label_shape, dtype=np.int32)) | label = tensor(np.ones(label_shape, dtype=np.int32)) | ||||
| loss = F.cross_entropy_with_softmax(pred, label) | |||||
| loss = F.nn.cross_entropy(pred, label) | |||||
| print(loss.numpy()) | print(loss.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -170,26 +177,41 @@ def cross_entropy_with_softmax( | |||||
| ) | ) | ||||
| num_classes = pred.shape[axis] | num_classes = pred.shape[axis] | ||||
| no_label_smooth = ( | |||||
| label_smooth is None or type(label_smooth) in (int, float) and label_smooth == 0 | |||||
| ) | |||||
| if not with_logits: | |||||
| if no_label_smooth: | |||||
| return -log(indexing_one_hot(pred, label, axis)).mean() | |||||
| pred = log(pred) | |||||
| return ( | |||||
| label_smooth * pred.mean() | |||||
| - (1 - label_smooth) * indexing_one_hot(pred, label, axis).mean() | |||||
| ) | |||||
| # Denominator of the softmax | # Denominator of the softmax | ||||
| offset = pred.max(axis=axis, keepdims=True).detach() | |||||
| pred = pred - offset | |||||
| down = exp(pred).sum(axis=axis, keepdims=True) | |||||
| down = logsumexp(pred, axis=axis, keepdims=True) | |||||
| up = indexing_one_hot(pred, label, axis) | up = indexing_one_hot(pred, label, axis) | ||||
| if label_smooth != 0: | |||||
| if not no_label_smooth: | |||||
| factor = label_smooth / num_classes | factor = label_smooth / num_classes | ||||
| up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor | up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor | ||||
| return (log(down) - up).mean() | |||||
| return (down - up).mean() | |||||
| def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor: | |||||
| r"""Function that measures the Binary Cross Entropy between the target and the prediction. | |||||
| def binary_cross_entropy( | |||||
| pred: Tensor, label: Tensor, with_logits: bool = True | |||||
| ) -> Tensor: | |||||
| r"""Compute the binary cross entropy loss (using logits by default). | |||||
| By default, prediction is assumed to be logits, whose sigmoid gives probabilities. | |||||
| :param pred: `(N, *)` where `*` means any number of additional dimensions. | |||||
| :param pred: `(N, *)`, where `*` means any number of additional dimensions. | |||||
| :param label: `(N, *)`, same shape as the input. | :param label: `(N, *)`, same shape as the input. | ||||
| :param with_logits: bool, whether to apply sigmoid first. Default: True | |||||
| :return: loss value. | :return: loss value. | ||||
| Examples: | Examples: | ||||
| @@ -200,9 +222,9 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor: | |||||
| from megengine import tensor | from megengine import tensor | ||||
| import megengine.functional as F | import megengine.functional as F | ||||
| pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(1, 2)) | |||||
| pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) | |||||
| label = tensor(np.ones((1, 2), dtype=np.float32)) | label = tensor(np.ones((1, 2), dtype=np.float32)) | ||||
| loss = F.binary_cross_entropy(pred, label) | |||||
| loss = F.nn.binary_cross_entropy(pred, label) | |||||
| print(loss.numpy()) | print(loss.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -212,11 +234,15 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor: | |||||
| [0.6931] | [0.6931] | ||||
| """ | """ | ||||
| return -1.0 * (label * log(pred) + (1.0 - label) * log(1 - pred)).mean() | |||||
| if not with_logits: | |||||
| return -(label * log(pred) + (1 - label) * log(1 - pred)).mean() | |||||
| # logsigmoid(pred) and logsigmoid(-pred) has common sub-expression | |||||
| # hopefully the backend would optimize this | |||||
| return -(label * logsigmoid(pred) + (1 - label) * logsigmoid(-pred)).mean() | |||||
| def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | ||||
| r"""Caculate the hinge loss which is often used in SVMs. | |||||
| r"""Caculates the hinge loss which is often used in SVM. | |||||
| The hinge loss can be described as: | The hinge loss can be described as: | ||||
| @@ -236,7 +262,7 @@ def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: | |||||
| pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") | ||||
| label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") | ||||
| loss = F.hinge_loss(pred, label) | |||||
| loss = F.nn.hinge_loss(pred, label) | |||||
| print(loss.numpy()) | print(loss.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -14,11 +14,12 @@ from typing import Optional, Sequence, Tuple, Union | |||||
| from ..core.ops import builtin | from ..core.ops import builtin | ||||
| from ..core.ops._internal import param_defs as P | from ..core.ops._internal import param_defs as P | ||||
| from ..core.ops.special import Const | |||||
| from ..core.tensor import utils | from ..core.tensor import utils | ||||
| from ..core.tensor.core import apply | |||||
| from ..core.tensor.core import TensorBase, TensorWrapperBase, apply | |||||
| from ..tensor import Tensor | from ..tensor import Tensor | ||||
| from .elemwise import clamp, exp, log, log1p | |||||
| from .tensor import add_axis, remove_axis, reshape | |||||
| from .elemwise import clip, exp, log, log1p | |||||
| from .tensor import reshape, squeeze | |||||
| __all__ = [ | __all__ = [ | ||||
| "argmax", | "argmax", | ||||
| @@ -45,7 +46,7 @@ def isnan(inp: Tensor) -> Tensor: | |||||
| r"""Returns a new tensor representing if each element is ``NaN`` or not. | r"""Returns a new tensor representing if each element is ``NaN`` or not. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :return: a new tensor representing if each element in inp is NaN or not. | |||||
| :return: result tensor. | |||||
| Examples: | Examples: | ||||
| @@ -71,7 +72,7 @@ def isinf(inp: Tensor) -> Tensor: | |||||
| r"""Returns a new tensor representing if each element is ``Inf`` or not. | r"""Returns a new tensor representing if each element is ``Inf`` or not. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :return: a new tensor representing if each element in inp is Inf or not. | |||||
| :return: result tensor. | |||||
| Examples: | Examples: | ||||
| @@ -84,7 +85,7 @@ def isinf(inp: Tensor) -> Tensor: | |||||
| print(F.isinf(x).numpy()) | print(F.isinf(x).numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [False True False] | [False True False] | ||||
| @@ -108,7 +109,7 @@ def sign(inp: Tensor): | |||||
| x = tensor([1, -1, 0]) | x = tensor([1, -1, 0]) | ||||
| print(F.sign(x).numpy()) | print(F.sign(x).numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| @@ -128,7 +129,7 @@ def sum( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. | |||||
| Default: None | Default: None | ||||
| :param keepdims: whether the output tensor has axis retained or not. | :param keepdims: whether the output tensor has axis retained or not. | ||||
| Default: False | Default: False | ||||
| @@ -163,7 +164,7 @@ def prod( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -199,7 +200,7 @@ def mean( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -235,7 +236,7 @@ def var( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -275,7 +276,7 @@ def std( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -310,7 +311,7 @@ def min( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -346,7 +347,7 @@ def max( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -373,18 +374,14 @@ def max( | |||||
| def norm( | def norm( | ||||
| inp: Tensor, | |||||
| p: int = 2, | |||||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||||
| keepdims=False, | |||||
| inp: Tensor, ord: float = None, axis: int = None, keepdims=False, | |||||
| ): | ): | ||||
| """Calculates ``p``-norm of input tensor along | """Calculates ``p``-norm of input tensor along | ||||
| given axis. If axis is a list of dimensions, | |||||
| reduce over all of them. | |||||
| given axis. | |||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param p: power of value applied to inp. Default: 2 | |||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param ord: power of value applied to inp. Default: 2 | |||||
| :param axis: dimension to reduce. If None, input must be a vector. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -396,7 +393,7 @@ def norm( | |||||
| from megengine import tensor | from megengine import tensor | ||||
| import megengine.functional as F | import megengine.functional as F | ||||
| x = tensor(np.arange(-3, 3, dtype=np.float32).reshape(2,3)) | |||||
| x = tensor(np.arange(-3, 3, dtype=np.float32)) | |||||
| out = F.norm(x) | out = F.norm(x) | ||||
| print(out.numpy()) | print(out.numpy()) | ||||
| @@ -407,13 +404,18 @@ def norm( | |||||
| [4.3589] | [4.3589] | ||||
| """ | """ | ||||
| if p == 0: | |||||
| if axis is None: | |||||
| if inp.ndim != 1: | |||||
| raise TypeError("axis is required unless input is a vector") | |||||
| if ord is None: | |||||
| ord = 2 | |||||
| if ord == 0: | |||||
| return sum(inp != 0, axis=axis, keepdims=keepdims) | return sum(inp != 0, axis=axis, keepdims=keepdims) | ||||
| if p == math.inf: | |||||
| if ord == math.inf: | |||||
| return max(abs(inp)) | return max(abs(inp)) | ||||
| if p == -math.inf: | |||||
| if ord == -math.inf: | |||||
| return min(abs(inp)) | return min(abs(inp)) | ||||
| return sum(abs(inp) ** p, axis=axis, keepdims=keepdims) ** (1.0 / p) | |||||
| return sum(abs(inp) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord) | |||||
| def argmin( | def argmin( | ||||
| @@ -426,7 +428,7 @@ def argmin( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -458,7 +460,7 @@ def argmin( | |||||
| (inp,) = apply(op, inp) | (inp,) = apply(op, inp) | ||||
| if not keepdims: | if not keepdims: | ||||
| inp = remove_axis(inp, ai) | |||||
| inp = squeeze(inp, ai) | |||||
| return inp | return inp | ||||
| @@ -470,7 +472,7 @@ def argmin( | |||||
| op = builtin.Argmin(axis=axis) | op = builtin.Argmin(axis=axis) | ||||
| (result,) = apply(op, inp) | (result,) = apply(op, inp) | ||||
| if not keepdims: | if not keepdims: | ||||
| result = remove_axis(result, axis) | |||||
| result = squeeze(result, axis) | |||||
| return result | return result | ||||
| @@ -484,7 +486,7 @@ def argmax( | |||||
| reduce over all of them. | reduce over all of them. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None | |||||
| :param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None | |||||
| :param keepdims: whether the output tensor has axis retained or not. Default: False | :param keepdims: whether the output tensor has axis retained or not. Default: False | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -516,7 +518,7 @@ def argmax( | |||||
| (inp,) = apply(op, inp) | (inp,) = apply(op, inp) | ||||
| if not keepdims: | if not keepdims: | ||||
| inp = remove_axis(inp, ai) | |||||
| inp = squeeze(inp, ai) | |||||
| return inp | return inp | ||||
| @@ -528,45 +530,40 @@ def argmax( | |||||
| op = builtin.Argmax(axis=axis) | op = builtin.Argmax(axis=axis) | ||||
| (result,) = apply(op, inp) | (result,) = apply(op, inp) | ||||
| if not keepdims: | if not keepdims: | ||||
| result = remove_axis(result, axis) | |||||
| result = squeeze(result, axis) | |||||
| return result | return result | ||||
| def normalize( | def normalize( | ||||
| inp: Tensor, | |||||
| p: int = 2, | |||||
| axis: Optional[Union[int, Sequence[int]]] = None, | |||||
| eps: float = 1e-12, | |||||
| inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12, | |||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r"""Performs :math:`L_p` normalization of input tensor along | r"""Performs :math:`L_p` normalization of input tensor along | ||||
| given axis. If axis is a list of dimensions, | |||||
| reduce over all of them. | |||||
| given axis. | |||||
| For a tensor inp of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | |||||
| For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each | |||||
| :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as: | :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as: | ||||
| .. math:: | .. math:: | ||||
| v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. | v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param p: power of value applied to inp. Default: 2 | |||||
| :param axis: dimension to reduce. If None, all the dimensions will be reduced | |||||
| to calculate the norm. Default: None | |||||
| :param ord: power of value applied to input tensor. Default: 2 | |||||
| :param axis: dimension to reduce.If None, input must be a vector. Default: None | |||||
| :param eps: a small value to avoid division by zero. Default: 1e-12 | :param eps: a small value to avoid division by zero. Default: 1e-12 | ||||
| :return: normalized output tensor. | :return: normalized output tensor. | ||||
| """ | """ | ||||
| if axis is None: | if axis is None: | ||||
| return inp / clamp(norm(inp, p, axis), lower=eps) | |||||
| return inp / clip(norm(inp, ord, axis), lower=eps) | |||||
| else: | else: | ||||
| return inp / clamp(norm(inp, p, axis, keepdims=True), lower=eps) | |||||
| return inp / clip(norm(inp, ord, axis, keepdims=True), lower=eps) | |||||
| def argsort(inp: Tensor, descending: bool = False) -> Tensor: | def argsort(inp: Tensor, descending: bool = False) -> Tensor: | ||||
| r"""Sorts the target 2d matrix by row, return both the sorted tensor and indices. | |||||
| r"""Returns the indices that would sort the input tensor. | |||||
| :param inp: input tensor, if 2d, each row will be sorted. | |||||
| :param descending: Sort in descending order, where the largest comes first. Default: False | |||||
| :return: Tuple of two tensors `(sorted_tensor, indices_of_int32)`. | |||||
| :param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor. | |||||
| :param descending: sort in descending order, where the largest comes first. Default: False | |||||
| :return: indices of int32 indicates how to sort the input. | |||||
| Examples: | Examples: | ||||
| @@ -603,6 +600,31 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor: | |||||
| def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: | ||||
| r"""Returns sorted tensor and the indices would sort the input tensor. | |||||
| :param inp: input tensor. If it's 2d, the result would be sorted by row. | |||||
| :param descending: sort in descending order, where the largest comes first. Default: False | |||||
| :return: tuple of two tensors `(sorted_tensor, indices_of_int32)`. | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| from megengine import tensor | |||||
| import megengine.functional as F | |||||
| x = tensor(np.array([1,2], dtype=np.float32)) | |||||
| out, indices = F.sort(x) | |||||
| print(out.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [1. 2.] | |||||
| """ | |||||
| assert len(inp.shape) <= 2, "Input should be 1d or 2d" | assert len(inp.shape) <= 2, "Input should be 1d or 2d" | ||||
| if descending: | if descending: | ||||
| order = P.Argsort.Order.DESCENDING | order = P.Argsort.Order.DESCENDING | ||||
| @@ -625,13 +647,13 @@ def topk( | |||||
| kth_only: bool = False, | kth_only: bool = False, | ||||
| no_sort: bool = False, | no_sort: bool = False, | ||||
| ) -> Tuple[Tensor, Tensor]: | ) -> Tuple[Tensor, Tensor]: | ||||
| r"""Selects the ``Top-K(by default)`` smallest elements of 2d matrix by row. | |||||
| r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row. | |||||
| :param inp: input tensor, if 2d, each row will be sorted. | |||||
| :param inp: input tensor. If input tensor is 2d, each row will be sorted. | |||||
| :param k: number of elements needed. | :param k: number of elements needed. | ||||
| :param descending: if true, return the largest elements instead. Default: False | |||||
| :param kth_only: if true, only the k-th element will be returned. Default: False | |||||
| :param no_sort: if true, the returned elements can be unordered. Default: False | |||||
| :param descending: if True, return the largest elements instead. Default: False | |||||
| :param kth_only: if True, only the k-th element will be returned. Default: False | |||||
| :param no_sort: if True, the returned elements can be unordered. Default: False | |||||
| :return: tuple of two tensors `(topk_tensor, indices_of_int32)`. | :return: tuple of two tensors `(topk_tensor, indices_of_int32)`. | ||||
| Examples: | Examples: | ||||
| @@ -665,15 +687,18 @@ def topk( | |||||
| mode = Mode.VALUE_IDX_SORTED | mode = Mode.VALUE_IDX_SORTED | ||||
| op = builtin.TopK(mode=mode) | op = builtin.TopK(mode=mode) | ||||
| if not isinstance(k, (TensorBase, TensorWrapperBase)): | |||||
| (k,) = Const(k, dtype="int32", device=inp.device)(inp) | |||||
| if len(inp.shape) == 1: | if len(inp.shape) == 1: | ||||
| inp = inp.reshape(1, -1) | inp = inp.reshape(1, -1) | ||||
| res = apply(op, inp, Tensor(k, dtype="int32")) | |||||
| res = apply(op, inp, k) | |||||
| if kth_only: | if kth_only: | ||||
| tns = res[0] | tns = res[0] | ||||
| else: | else: | ||||
| tns, ind = res[0][0], res[1][0] | tns, ind = res[0][0], res[1][0] | ||||
| else: | else: | ||||
| res = apply(op, inp, Tensor(k, dtype="int32")) | |||||
| res = apply(op, inp, k) | |||||
| if kth_only: | if kth_only: | ||||
| tns = res | tns = res | ||||
| else: | else: | ||||
| @@ -13,46 +13,51 @@ from ..core._imperative_rt import CompNode | |||||
| from ..core.ops import builtin | from ..core.ops import builtin | ||||
| from ..core.ops._internal import param_defs as P | from ..core.ops._internal import param_defs as P | ||||
| from ..core.ops.special import Const | from ..core.ops.special import Const | ||||
| from ..core.tensor import utils | |||||
| from ..core.tensor import megbrain_graph, utils | |||||
| from ..core.tensor.core import TensorBase, TensorWrapperBase, apply | from ..core.tensor.core import TensorBase, TensorWrapperBase, apply | ||||
| from ..core.tensor.utils import astensor1d | |||||
| from ..distributed import WORLD, is_distributed | from ..distributed import WORLD, is_distributed | ||||
| from ..jit.tracing import is_tracing | |||||
| from ..random import uniform | from ..random import uniform | ||||
| from ..tensor import Tensor | from ..tensor import Tensor | ||||
| from .debug_param import get_conv_execution_strategy | from .debug_param import get_conv_execution_strategy | ||||
| from .distributed import all_reduce_sum | from .distributed import all_reduce_sum | ||||
| from .elemwise import exp, floor, log, log1p, maximum, minimum, relu | from .elemwise import exp, floor, log, log1p, maximum, minimum, relu | ||||
| from .math import argsort, max, sum | from .math import argsort, max, sum | ||||
| from .tensor import add_axis, broadcast, concat, full, ones, remove_axis, reshape, zeros | |||||
| from .tensor import ( | |||||
| broadcast_to, | |||||
| concat, | |||||
| expand_dims, | |||||
| full, | |||||
| ones, | |||||
| reshape, | |||||
| squeeze, | |||||
| zeros, | |||||
| ) | |||||
| from .types import _pair, _pair_nonzero | from .types import _pair, _pair_nonzero | ||||
| __all__ = [ | __all__ = [ | ||||
| "adaptive_avg_pool2d", | |||||
| "adaptive_max_pool2d", | |||||
| "avg_pool2d", | "avg_pool2d", | ||||
| "batched_nms", | |||||
| "batch_norm2d", | |||||
| "batch_norm", | |||||
| "conv2d", | "conv2d", | ||||
| "conv_transpose2d", | "conv_transpose2d", | ||||
| "dot", | "dot", | ||||
| "dropout", | "dropout", | ||||
| "embedding", | |||||
| "indexing_one_hot", | "indexing_one_hot", | ||||
| "interpolate", | |||||
| "leaky_relu", | "leaky_relu", | ||||
| "linear", | |||||
| "local_conv2d", | "local_conv2d", | ||||
| "logsigmoid", | "logsigmoid", | ||||
| "logsumexp", | "logsumexp", | ||||
| "log_softmax", | |||||
| "logsoftmax", | |||||
| "matmul", | "matmul", | ||||
| "max_pool2d", | "max_pool2d", | ||||
| "nms", | |||||
| "one_hot", | "one_hot", | ||||
| "prelu", | "prelu", | ||||
| "roi_align", | |||||
| "roi_pooling", | |||||
| "softmax", | "softmax", | ||||
| "softplus", | "softplus", | ||||
| "svd", | "svd", | ||||
| "sync_batch_norm", | |||||
| "warp_perspective", | "warp_perspective", | ||||
| ] | ] | ||||
| @@ -106,19 +111,18 @@ def conv2d( | |||||
| :param padding: size of the paddings added to the input on both sides of its | :param padding: size of the paddings added to the input on both sides of its | ||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | spatial dimensions. Only zero-padding is supported. Default: 0 | ||||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | :param dilation: dilation of the 2D convolution operation. Default: 1 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a ``grouped convolution``. When groups is not 1, | |||||
| in_channels and out_channels must be divisible by groups, | |||||
| :param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||||
| and the shape of weight should be `(groups, out_channel // groups, | and the shape of weight should be `(groups, out_channel // groups, | ||||
| in_channels // groups, height, width)`. | in_channels // groups, height, width)`. | ||||
| :type conv_mode: string or :class:`P.Convolution.Mode`. | |||||
| :type conv_mode: string or :class:`P.Convolution.Mode` | |||||
| :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: | :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: | ||||
| "CROSS_CORRELATION" | "CROSS_CORRELATION" | ||||
| :type compute_mode: string or | :type compute_mode: string or | ||||
| :class:`P.Convolution.ComputeMode`. | |||||
| :class:`P.Convolution.ComputeMode` | |||||
| :param compute_mode: when set to "DEFAULT", no special requirements will be | :param compute_mode: when set to "DEFAULT", no special requirements will be | ||||
| placed on the precision of intermediate results. When set to "FLOAT32", | placed on the precision of intermediate results. When set to "FLOAT32", | ||||
| Float32 would be used for accumulator and intermediate result, but only | |||||
| "Float32" would be used for accumulator and intermediate result, but only | |||||
| effective when input and output are of Float16 dtype. | effective when input and output are of Float16 dtype. | ||||
| :return: output tensor. | :return: output tensor. | ||||
| """ | """ | ||||
| @@ -167,24 +171,23 @@ def conv_transpose2d( | |||||
| :param inp: feature map of the convolution operation. | :param inp: feature map of the convolution operation. | ||||
| :param weight: convolution kernel. | :param weight: convolution kernel. | ||||
| :param bias: bias added to the result of convolution (if given) | |||||
| :param bias: bias added to the result of convolution (if given). | |||||
| :param stride: stride of the 2D convolution operation. Default: 1 | :param stride: stride of the 2D convolution operation. Default: 1 | ||||
| :param padding: size of the paddings added to the input on both sides of its | :param padding: size of the paddings added to the input on both sides of its | ||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | spatial dimensions. Only zero-padding is supported. Default: 0 | ||||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | :param dilation: dilation of the 2D convolution operation. Default: 1 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a ``grouped convolution``. When groups is not 1, | |||||
| in_channels and out_channels must be divisible by groups, | |||||
| :param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by groups, | |||||
| and the shape of weight should be `(groups, out_channel // groups, | and the shape of weight should be `(groups, out_channel // groups, | ||||
| in_channels // groups, height, width)`. Default: 1 | in_channels // groups, height, width)`. Default: 1 | ||||
| :type conv_mode: string or :class:`P.Convolution.Mode`. | |||||
| :type conv_mode: string or :class:`P.Convolution.Mode` | |||||
| :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: | :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: | ||||
| "CROSS_CORRELATION" | "CROSS_CORRELATION" | ||||
| :type compute_mode: string or | :type compute_mode: string or | ||||
| :class:`P.Convolution.ComputeMode`. | |||||
| :class:`P.Convolution.ComputeMode` | |||||
| :param compute_mode: when set to "DEFAULT", no special requirements will be | :param compute_mode: when set to "DEFAULT", no special requirements will be | ||||
| placed on the precision of intermediate results. When set to "FLOAT32", | placed on the precision of intermediate results. When set to "FLOAT32", | ||||
| Float32 would be used for accumulator and intermediate result, but only | |||||
| "Float32" would be used for accumulator and intermediate result, but only | |||||
| effective when input and output are of Float16 dtype. | effective when input and output are of Float16 dtype. | ||||
| :return: output tensor. | :return: output tensor. | ||||
| """ | """ | ||||
| @@ -222,10 +225,8 @@ def local_conv2d( | |||||
| padding: Union[int, Tuple[int, int]] = 0, | padding: Union[int, Tuple[int, int]] = 0, | ||||
| dilation: Union[int, Tuple[int, int]] = 1, | dilation: Union[int, Tuple[int, int]] = 1, | ||||
| conv_mode="CROSS_CORRELATION", | conv_mode="CROSS_CORRELATION", | ||||
| ) -> Tensor: | |||||
| """Applies spatial 2D convolution over an image with untied kernels. | |||||
| Refer to :class:`~.LocalConv2d` for more information. | |||||
| ): | |||||
| """Applies spatial 2D convolution over an groupped channeled image with untied kernels. | |||||
| """ | """ | ||||
| assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" | ||||
| @@ -233,6 +234,8 @@ def local_conv2d( | |||||
| pad_h, pad_w = expand_hw(padding) | pad_h, pad_w = expand_hw(padding) | ||||
| dilate_h, dilate_w = expand_hw(dilation) | dilate_h, dilate_w = expand_hw(dilation) | ||||
| Sparse = P.Convolution.Sparse | |||||
| op = builtin.GroupLocal( | op = builtin.GroupLocal( | ||||
| stride_h=stride_h, | stride_h=stride_h, | ||||
| stride_w=stride_w, | stride_w=stride_w, | ||||
| @@ -240,7 +243,9 @@ def local_conv2d( | |||||
| pad_w=pad_w, | pad_w=pad_w, | ||||
| dilate_h=dilate_h, | dilate_h=dilate_h, | ||||
| dilate_w=dilate_w, | dilate_w=dilate_w, | ||||
| # strategy=get_conv_execution_strategy(), | |||||
| mode=conv_mode, | |||||
| compute_mode="DEFAULT", | |||||
| sparse=Sparse.DENSE, | |||||
| ) | ) | ||||
| inp, weight = utils.convert_inputs(inp, weight) | inp, weight = utils.convert_inputs(inp, weight) | ||||
| (output,) = apply(op, inp, weight) | (output,) = apply(op, inp, weight) | ||||
| @@ -263,7 +268,7 @@ def max_pool2d( | |||||
| :param kernel_size: size of the window. | :param kernel_size: size of the window. | ||||
| :param stride: stride of the window. If not provided, its value is set to kernel_size. | :param stride: stride of the window. If not provided, its value is set to kernel_size. | ||||
| Default: None | Default: None | ||||
| :param padding: implicit zero padding to be added on both sides. Default: 0 | |||||
| :param padding: implicit zero padding added on both sides. Default: 0 | |||||
| :return: output tensor. | :return: output tensor. | ||||
| """ | """ | ||||
| if stride is None: | if stride is None: | ||||
| @@ -292,15 +297,15 @@ def avg_pool2d( | |||||
| padding: Union[int, Tuple[int, int]] = 0, | padding: Union[int, Tuple[int, int]] = 0, | ||||
| mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Applies a 2D average pooling over an input tensor. | |||||
| """Applies 2D average pooling over an input tensor. | |||||
| Refer to :class:`~.AvgPool2d` for more information. | Refer to :class:`~.AvgPool2d` for more information. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param kernel_size: size of the window. | :param kernel_size: size of the window. | ||||
| :param stride: stride of the window. If not provided, its value is set to kernel_size. | |||||
| :param stride: stride of the window. If not provided, its value is set to ``kernel_size``. | |||||
| Default: None | Default: None | ||||
| :param padding: implicit zero padding to be added on both sides. Default: 0 | |||||
| :param padding: implicit zero padding added on both sides. Default: 0 | |||||
| :param mode: whether to count padding values. Default: "AVERAGE_COUNT_EXCLUDE_PADDING" | :param mode: whether to count padding values. Default: "AVERAGE_COUNT_EXCLUDE_PADDING" | ||||
| :return: output tensor. | :return: output tensor. | ||||
| """ | """ | ||||
| @@ -323,6 +328,48 @@ def avg_pool2d( | |||||
| return output | return output | ||||
| def adaptive_max_pool2d( | |||||
| inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||||
| ) -> Tensor: | |||||
| """Applies a 2D max adaptive pooling over an input. | |||||
| Refer to :class:`~.MaxAdaptivePool2d` for more information. | |||||
| :param inp: The input tensor. | |||||
| :param oshp: (OH, OW) size of the output shape. | |||||
| :return: output tensor. | |||||
| """ | |||||
| assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type" | |||||
| if isinstance(oshp, int): | |||||
| oshp = (oshp, oshp) | |||||
| op = builtin.AdaptivePooling(mode="MAX", format="NCHW",) | |||||
| oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device) | |||||
| (output,) = apply(op, inp, oshp) | |||||
| return output | |||||
| def adaptive_avg_pool2d( | |||||
| inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor], | |||||
| ) -> Tensor: | |||||
| """Applies a 2D average adaptive pooling over an input. | |||||
| Refer to :class:`~.AvgAdaptivePool2d` for more information. | |||||
| :param inp: The input tensor. | |||||
| :param oshp: (OH, OW) size of the output shape. | |||||
| :return: output tensor. | |||||
| """ | |||||
| assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type" | |||||
| if isinstance(oshp, int): | |||||
| oshp = (oshp, oshp) | |||||
| op = builtin.AdaptivePooling(mode="AVERAGE", format="NCHW",) | |||||
| oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device) | |||||
| (output,) = apply(op, inp, oshp) | |||||
| return output | |||||
| def prelu(inp: Tensor, weight: Tensor) -> Tensor: | def prelu(inp: Tensor, weight: Tensor) -> Tensor: | ||||
| r""" | r""" | ||||
| Applies the element-wise PReLU function. | Applies the element-wise PReLU function. | ||||
| @@ -346,17 +393,17 @@ def softplus(inp: Tensor) -> Tensor: | |||||
| .. math:: | .. math:: | ||||
| \text{softplus}(x) = \log(1 + \exp(x)) | \text{softplus}(x) = \log(1 + \exp(x)) | ||||
| softplus is a smooth approximation to the ReLU function and can be used | softplus is a smooth approximation to the ReLU function and can be used | ||||
| to constrain the output of a machine to always be positive. | |||||
| to constrain the output to be always positive. | |||||
| For numerical stability the implementation follows this transformation: | For numerical stability the implementation follows this transformation: | ||||
| .. math:: | .. math:: | ||||
| \text{softplus}(x) = \log(1 + \exp(x)) | |||||
| = \log(1 + \exp(-\text{abs}(x))) + \max(x, 0) | |||||
| \text{softplus}(x) = \log(1 + \exp(x)) | |||||
| = \log(1 + \exp(-\text{abs}(x))) + \max(x, 0) | |||||
| = \log1p(\exp(-\text{abs}(x))) + \text{relu}(x) | = \log1p(\exp(-\text{abs}(x))) + \text{relu}(x) | ||||
| :param inp: The input tensor | |||||
| :param inp: input tensor. | |||||
| Examples: | Examples: | ||||
| @@ -369,9 +416,9 @@ def softplus(inp: Tensor) -> Tensor: | |||||
| x = tensor(np.arange(-3, 3, dtype=np.float32)) | x = tensor(np.arange(-3, 3, dtype=np.float32)) | ||||
| y = F.softplus(x) | y = F.softplus(x) | ||||
| print(y.numpy()) | print(y.numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [0.0486 0.1269 0.3133 0.6931 1.3133 2.1269] | [0.0486 0.1269 0.3133 0.6931 1.3133 2.1269] | ||||
| @@ -380,7 +427,7 @@ def softplus(inp: Tensor) -> Tensor: | |||||
| return log1p(exp(-abs(inp))) + relu(inp) | return log1p(exp(-abs(inp))) + relu(inp) | ||||
| def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional | ||||
| input Tensor. The LogSoftmax formulation can be simplified as: | input Tensor. The LogSoftmax formulation can be simplified as: | ||||
| @@ -390,13 +437,13 @@ def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| For numerical stability the implementation follows this transformation: | For numerical stability the implementation follows this transformation: | ||||
| .. math:: | .. math:: | ||||
| \operatorname{logsoftmax}(x) | |||||
| \operatorname{logsoftmax}(x) | |||||
| = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))}) | = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))}) | ||||
| = x - \log (\sum_{i}(\exp (x_{i}))) | = x - \log (\sum_{i}(\exp (x_{i}))) | ||||
| = x - logsumexp(x) | = x - logsumexp(x) | ||||
| :param inp: The input tensor | |||||
| :param axis: An axis along which log_softmax will be applied. | |||||
| :param inp: input tensor. | |||||
| :param axis: axis along which logsoftmax will be applied. | |||||
| Examples: | Examples: | ||||
| @@ -407,11 +454,11 @@ def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| import megengine.functional as F | import megengine.functional as F | ||||
| x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) | ||||
| y = F.log_softmax(x, axis=1) | |||||
| y = F.logsoftmax(x, axis=1) | |||||
| print(y.numpy()) | print(y.numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [[-4.4519 -3.4519 -2.4519 -1.4519 -0.4519] | [[-4.4519 -3.4519 -2.4519 -1.4519 -0.4519] | ||||
| @@ -430,7 +477,7 @@ def logsigmoid(inp: Tensor) -> Tensor: | |||||
| = - \log(1 + exp(-x)) | = - \log(1 + exp(-x)) | ||||
| = - \text{softplus}(-x) | = - \text{softplus}(-x) | ||||
| :param inp: The input tensor | |||||
| :param inp: input tensor. | |||||
| Examples: | Examples: | ||||
| @@ -459,11 +506,10 @@ def logsumexp( | |||||
| inp: Tensor, axis: Union[int, Sequence[int]], keepdims: bool = False | inp: Tensor, axis: Union[int, Sequence[int]], keepdims: bool = False | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r""" | r""" | ||||
| Compute the log of the sum of exponentials of inputs along the given :attr:`axis`. | |||||
| The computation is numerically stabilized. | |||||
| Calculates the logarithm of the inputs' exponential sum along the given :attr:`axis`. | |||||
| .. math:: | .. math:: | ||||
| \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) | \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) | ||||
| For numerical stability, the implementation follows this transformation: | For numerical stability, the implementation follows this transformation: | ||||
| @@ -472,18 +518,18 @@ def logsumexp( | |||||
| \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) | \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) | ||||
| = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right) | = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right) | ||||
| where | where | ||||
| .. math:: | .. math:: | ||||
| b = \max(x_j) | b = \max(x_j) | ||||
| :param inp: The input tensor. | |||||
| :param axis: Axis over which the sum is taken. It can be a single axis or a list of axes. | |||||
| :param inp: input tensor. | |||||
| :param axis: axis over which the sum is taken. It could be single axis or list of axes. | |||||
| :param keepdims: whether to retain :attr:`axis` or not for the output tensor. | :param keepdims: whether to retain :attr:`axis` or not for the output tensor. | ||||
| Examples: | Examples: | ||||
| .. testcode:: | .. testcode:: | ||||
| import numpy as np | import numpy as np | ||||
| @@ -501,11 +547,11 @@ def logsumexp( | |||||
| [-0.5481 4.4519] | [-0.5481 4.4519] | ||||
| """ | """ | ||||
| max_value = max(inp, axis, keepdims=True) | |||||
| max_value = max(inp.detach(), axis, keepdims=True) | |||||
| if keepdims: | if keepdims: | ||||
| return max_value + log(sum(exp(inp - max_value), axis, keepdims)) | return max_value + log(sum(exp(inp - max_value), axis, keepdims)) | ||||
| else: | else: | ||||
| return remove_axis(max_value, axis=None) + log( | |||||
| return squeeze(max_value, axis=None) + log( | |||||
| sum(exp(inp - max_value), axis, keepdims) | sum(exp(inp - max_value), axis, keepdims) | ||||
| ) | ) | ||||
| @@ -523,13 +569,13 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: | |||||
| .. math:: | .. math:: | ||||
| \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} | \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} | ||||
| It is applied to all elements along axis, and will re-scale them so that | |||||
| the elements lie in the range `[0, 1]` and sum to 1. | |||||
| It is applied to all elements along axis, and rescales elements so that | |||||
| they stay in the range `[0, 1]` and sum to 1. | |||||
| See :class:`~megengine.module.activation.Softmax` for more details. | See :class:`~megengine.module.activation.Softmax` for more details. | ||||
| :param inp: The input tensor. | |||||
| :param axis: An axis along which softmax will be applied. By default, | |||||
| :param inp: input tensor. | |||||
| :param axis: an axis along which softmax will be applied. By default, | |||||
| softmax will apply along the highest ranked axis. | softmax will apply along the highest ranked axis. | ||||
| Examples: | Examples: | ||||
| @@ -560,7 +606,7 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor: | |||||
| return cached / down | return cached / down | ||||
| def batch_norm2d( | |||||
| def batch_norm( | |||||
| inp: Tensor, | inp: Tensor, | ||||
| running_mean: Tensor = None, | running_mean: Tensor = None, | ||||
| running_var: Tensor = None, | running_var: Tensor = None, | ||||
| @@ -572,7 +618,7 @@ def batch_norm2d( | |||||
| eps: float = 1e-5, | eps: float = 1e-5, | ||||
| inplace: bool = True | inplace: bool = True | ||||
| ): | ): | ||||
| """Applies batch normalization to the input. | |||||
| r"""Applies batch normalization to the input. | |||||
| Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | ||||
| @@ -584,26 +630,28 @@ def batch_norm2d( | |||||
| :param bias: bias tensor in the learnable affine parameters. | :param bias: bias tensor in the learnable affine parameters. | ||||
| See :math:`\beta` in :class:`~.BatchNorm2d`. | See :math:`\beta` in :class:`~.BatchNorm2d`. | ||||
| :param training: a boolean value to indicate whether batch norm is performed | :param training: a boolean value to indicate whether batch norm is performed | ||||
| in traning mode. Default: False | |||||
| in training mode. Default: False | |||||
| :param momentum: value used for the ``running_mean`` and ``running_var`` | :param momentum: value used for the ``running_mean`` and ``running_var`` | ||||
| computation. | computation. | ||||
| Default: 0.9 | Default: 0.9 | ||||
| :param eps: a value added to the denominator for numerical stability. | :param eps: a value added to the denominator for numerical stability. | ||||
| Default: 1e-5 | Default: 1e-5 | ||||
| :param inplace: whether to update running_mean and running_var inplace or return new tensors | |||||
| :param inplace: whether to update ``running_mean`` and ``running_var`` inplace or return new tensors | |||||
| Default: True | Default: True | ||||
| :return: output tensor. | :return: output tensor. | ||||
| """ | """ | ||||
| if inp.ndim != 4: | |||||
| raise NotImplementedError("batch_norm for ndim != 4") | |||||
| def full_value(value): | def full_value(value): | ||||
| C = inp.shape[1] | C = inp.shape[1] | ||||
| (x,) = Const(value, dtype=inp.dtype, device=inp.device)(inp) | (x,) = Const(value, dtype=inp.dtype, device=inp.device)(inp) | ||||
| return broadcast(x, [1, C, 1, 1]) | |||||
| return broadcast_to(x, [1, C, 1, 1]) | |||||
| def expand_or_full(x, value): | def expand_or_full(x, value): | ||||
| if x is None: | if x is None: | ||||
| return full_value(value) | return full_value(value) | ||||
| return add_axis(x, [0, 2, 3]) | |||||
| return expand_dims(x, [0, 2, 3]) | |||||
| def make_full_if_none(x, value): | def make_full_if_none(x, value): | ||||
| if x is None: | if x is None: | ||||
| @@ -676,7 +724,7 @@ def sync_batch_norm( | |||||
| eps_mode="ADDITIVE", | eps_mode="ADDITIVE", | ||||
| group=WORLD, | group=WORLD, | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Applies synchronized batch normalization to the input. | |||||
| r"""Applies synchronized batch normalization to the input. | |||||
| Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. | ||||
| @@ -717,7 +765,7 @@ def sync_batch_norm( | |||||
| if is_distributed(): | if is_distributed(): | ||||
| # reduce all nodes' data to calculate mean and variance | # reduce all nodes' data to calculate mean and variance | ||||
| reduce_size = broadcast(Tensor(reduce_size, dtype=_dtype), [1] * _ndim) | |||||
| reduce_size = broadcast_to(Tensor(reduce_size, dtype=_dtype), [1] * _ndim) | |||||
| stat = concat( | stat = concat( | ||||
| [reduce_size.astype(_dtype), channel_x1s, channel_x2s], axis=1 | [reduce_size.astype(_dtype), channel_x1s, channel_x2s], axis=1 | ||||
| ) | ) | ||||
| @@ -838,6 +886,10 @@ def warp_perspective( | |||||
| :param interp_mode: interpolation methods. Default: "LINEAR" | :param interp_mode: interpolation methods. Default: "LINEAR" | ||||
| :return: output tensor. | :return: output tensor. | ||||
| Note: | |||||
| The transformation matrix is the inverse of that used by `cv2.warpPerspective`. | |||||
| Examples: | Examples: | ||||
| .. testcode:: | .. testcode:: | ||||
| @@ -868,7 +920,8 @@ def warp_perspective( | |||||
| imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val | imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val | ||||
| ) | ) | ||||
| inp, M = utils.convert_inputs(inp, M) | inp, M = utils.convert_inputs(inp, M) | ||||
| (result,) = apply(op, inp, M, Tensor(dsize)) | |||||
| dsize = astensor1d(dsize, inp, dtype="int32", device=inp.device) | |||||
| (result,) = apply(op, inp, M, dsize) | |||||
| return result | return result | ||||
| @@ -885,19 +938,18 @@ def matmul( | |||||
| With different inputs dim, this function behaves differently: | With different inputs dim, this function behaves differently: | ||||
| - Both 1-D tensor, simply forward to dot. | |||||
| - Both 1-D tensor, simply forward to ``dot``. | |||||
| - Both 2-D tensor, normal matrix multiplication. | - Both 2-D tensor, normal matrix multiplication. | ||||
| - If one input tensor is 1-D, matrix vector multiplication. | - If one input tensor is 1-D, matrix vector multiplication. | ||||
| - If at least one tensor are 3-dimensional or >3-dimensional, the batched matrix-matrix is returned, and the tensor with smaller dimension will | |||||
| - If at least one tensor are 3-dimensional or >3-dimensional, the other tensor should have dim >= 2, the batched matrix-matrix is returned, and the tensor with smaller dimension will | |||||
| be broadcasted. For example: | be broadcasted. For example: | ||||
| - inp1: `(k, m)`, inp2: `(m, p)`, return: `(k, p)` | |||||
| - inp1: `(n, k, m)`, inp2: `(n, m, p)`, return: `(n, k, p)` | - inp1: `(n, k, m)`, inp2: `(n, m, p)`, return: `(n, k, p)` | ||||
| - inp1: `(n, k, m)`, inp2: `(m, p)`, return: `(n, k, p)` | - inp1: `(n, k, m)`, inp2: `(m, p)`, return: `(n, k, p)` | ||||
| - inp1: `(n, j, k, m)`, inp2: `(n, j, m, p)`, return: `(n, j, k, p)` | - inp1: `(n, j, k, m)`, inp2: `(n, j, m, p)`, return: `(n, j, k, p)` | ||||
| :param inp1: The first matrix to be multiplied | |||||
| :param inp2: The second matrix to be multiplied | |||||
| :return: The output tensor | |||||
| :param inp1: first matrix to be multiplied. | |||||
| :param inp2: second matrix to be multiplied. | |||||
| :return: output tensor. | |||||
| Examples: | Examples: | ||||
| @@ -931,10 +983,10 @@ def matmul( | |||||
| if dim1 != dim2: | if dim1 != dim2: | ||||
| if dim1 < dim2: | if dim1 < dim2: | ||||
| shape1 = shape2[: dim2 - dim1] + shape1 | shape1 = shape2[: dim2 - dim1] + shape1 | ||||
| inp1 = inp1.broadcast(*shape1) | |||||
| inp1 = broadcast_to(inp1, shape1) | |||||
| else: | else: | ||||
| shape2 = shape1[: dim1 - dim2] + shape2 | shape2 = shape1[: dim1 - dim2] + shape2 | ||||
| inp2 = inp2.broadcast(*shape2) | |||||
| inp2 = broadcast_to(inp2, shape2) | |||||
| reshaped_batch_size = 1 | reshaped_batch_size = 1 | ||||
| for i in shape1[:-2]: | for i in shape1[:-2]: | ||||
| reshaped_batch_size *= i | reshaped_batch_size *= i | ||||
| @@ -949,9 +1001,9 @@ def matmul( | |||||
| shp = shape1[:-1] + shape2[-1:] | shp = shape1[:-1] + shape2[-1:] | ||||
| elif dim1 == 3 or dim2 == 3: | elif dim1 == 3 or dim2 == 3: | ||||
| if dim2 < 3: | if dim2 < 3: | ||||
| inp2 = inp2.broadcast(*(inp1.shape[:1] + inp2.shape)) | |||||
| inp2 = broadcast_to(inp2, inp1.shape[:1] + inp2.shape) | |||||
| elif dim1 < 3: | elif dim1 < 3: | ||||
| inp1 = inp1.broadcast(*(inp2.shape[:1] + inp1.shape)) | |||||
| inp1 = broadcast_to(inp1, inp2.shape[:1] + inp1.shape) | |||||
| op = builtin.BatchedMatrixMul( | op = builtin.BatchedMatrixMul( | ||||
| transposeA=transpose_a, | transposeA=transpose_a, | ||||
| transposeB=transpose_b, | transposeB=transpose_b, | ||||
| @@ -961,10 +1013,10 @@ def matmul( | |||||
| else: | else: | ||||
| if dim1 == 1: | if dim1 == 1: | ||||
| shp = (inp2.shape[1],) | shp = (inp2.shape[1],) | ||||
| inp1 = add_axis(inp1, 0) | |||||
| inp1 = expand_dims(inp1, 0) | |||||
| if dim2 == 1: | if dim2 == 1: | ||||
| shp = (inp1.shape[0],) | shp = (inp1.shape[0],) | ||||
| inp2 = add_axis(inp2, 1) | |||||
| inp2 = expand_dims(inp2, 1) | |||||
| op = builtin.MatrixMul( | op = builtin.MatrixMul( | ||||
| transposeA=transpose_a, | transposeA=transpose_a, | ||||
| transposeB=transpose_b, | transposeB=transpose_b, | ||||
| @@ -981,12 +1033,12 @@ def matmul( | |||||
| def dot(inp1: Tensor, inp2: Tensor) -> Tensor: | def dot(inp1: Tensor, inp2: Tensor) -> Tensor: | ||||
| """ | """ | ||||
| Compute dot-product of two vectors ``inp1`` and ``inp2``. | |||||
| Computes dot-product of two vectors ``inp1`` and ``inp2``. | |||||
| inputs must be 1-dimensional, scalar input can be automatically broadcasted. | inputs must be 1-dimensional, scalar input can be automatically broadcasted. | ||||
| :param inp1: The first vector | |||||
| :param inp2: The second vector | |||||
| :return: The output value | |||||
| :param inp1: first vector. | |||||
| :param inp2: second vector. | |||||
| :return: output value. | |||||
| Examples: | Examples: | ||||
| @@ -1016,10 +1068,10 @@ def dot(inp1: Tensor, inp2: Tensor) -> Tensor: | |||||
| def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | ||||
| """ | """ | ||||
| Compute the singular value decompositions of input matrix ``inp``. | |||||
| Computes the singular value decompositions of input matrix. | |||||
| :param inp: The input matrix, must has shape ``[..., M, N]`` | |||||
| :return: The output matrices, U, sigma, V | |||||
| :param inp: input matrix, must has shape `[..., M, N]`. | |||||
| :return: output matrices, `(U, sigma, V)`. | |||||
| Examples: | Examples: | ||||
| @@ -1036,7 +1088,7 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: | |||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [7.3485 1. ] | [7.3485 1. ] | ||||
| """ | """ | ||||
| @@ -1052,8 +1104,7 @@ def interpolate( | |||||
| mode: str = "BILINEAR", | mode: str = "BILINEAR", | ||||
| align_corners: bool = None, | align_corners: bool = None, | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r"""Down/up samples the input tensor to either the given size or the given | |||||
| scale_factor. | |||||
| r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``. | |||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param size: size of the output tensor. Default: None | :param size: size of the output tensor. Default: None | ||||
| @@ -1069,13 +1120,12 @@ def interpolate( | |||||
| import numpy as np | import numpy as np | ||||
| from megengine import tensor | from megengine import tensor | ||||
| import megengine.functional as F | import megengine.functional as F | ||||
| from megengine.test import assertTensorClose | |||||
| x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) | ||||
| out = F.interpolate(x, [4, 4], align_corners=False) | |||||
| out = F.nn.interpolate(x, [4, 4], align_corners=False) | |||||
| print(out.numpy()) | print(out.numpy()) | ||||
| out2 = F.interpolate(x, scale_factor=2.) | |||||
| assertTensorClose(out.numpy(), out2.numpy()) | |||||
| out2 = F.nn.interpolate(x, scale_factor=2.) | |||||
| np.testing.assert_allclose(out.numpy(), out2.numpy()) | |||||
| Outputs: | Outputs: | ||||
| @@ -1100,7 +1150,7 @@ def interpolate( | |||||
| align_corners = False | align_corners = False | ||||
| if mode == "LINEAR": | if mode == "LINEAR": | ||||
| inp = add_axis(inp, 3) | |||||
| inp = expand_dims(inp, 3) | |||||
| if inp.ndim != 4: | if inp.ndim != 4: | ||||
| raise ValueError("shape of input tensor must correspond to the operartion mode") | raise ValueError("shape of input tensor must correspond to the operartion mode") | ||||
| @@ -1170,7 +1220,7 @@ def interpolate( | |||||
| [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], | [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], | ||||
| axis=0, | axis=0, | ||||
| ).reshape(1, 3, 3) | ).reshape(1, 3, 3) | ||||
| weight = broadcast(weight, (inp.shape[0], 3, 3)) | |||||
| weight = broadcast_to(weight, (inp.shape[0], 3, 3)) | |||||
| else: | else: | ||||
| hscale = 1.0 * ih / oh | hscale = 1.0 * ih / oh | ||||
| wscale = 1.0 * iw / ow | wscale = 1.0 * iw / ow | ||||
| @@ -1186,7 +1236,7 @@ def interpolate( | |||||
| [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], | [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], | ||||
| axis=0, | axis=0, | ||||
| ).reshape(1, 3, 3) | ).reshape(1, 3, 3) | ||||
| weight = broadcast(weight, (inp.shape[0], 3, 3)) | |||||
| weight = broadcast_to(weight, (inp.shape[0], 3, 3)) | |||||
| weight = weight.astype("float32") | weight = weight.astype("float32") | ||||
| ret = warp_perspective(inp, weight, dsize, interp_mode="LINEAR") | ret = warp_perspective(inp, weight, dsize, interp_mode="LINEAR") | ||||
| @@ -1197,12 +1247,12 @@ def interpolate( | |||||
| def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: | ||||
| """Returns a new tensor where each of the elements are randomly set to zero | """Returns a new tensor where each of the elements are randomly set to zero | ||||
| with probability P = ``drop_prob``. Optionally rescale the output tensor. | |||||
| with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True. | |||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param drop_prob: probability to drop (set to zero) a single element. | :param drop_prob: probability to drop (set to zero) a single element. | ||||
| :param training: the default behavior of ``dropout`` during training is to rescale the output, | :param training: the default behavior of ``dropout`` during training is to rescale the output, | ||||
| then it can be replaced by an :class:`~.Identity` during inference, default to True. | |||||
| then it can be replaced by an :class:`~.Identity` during inference. Default: True | |||||
| :return: the output tensor | :return: the output tensor | ||||
| Examples: | Examples: | ||||
| @@ -1244,10 +1294,10 @@ def embedding( | |||||
| """Applies lookup table for embedding. | """Applies lookup table for embedding. | ||||
| :param inp: tensor with indices. | :param inp: tensor with indices. | ||||
| :param weight: learnable weights which embedding from. | |||||
| :param padding_idx: should be set to None, not support now. | |||||
| :param max_norm: should be set to None, not support now. | |||||
| :param norm_type: should be set to None, not support now. | |||||
| :param weight: learnable weights which embeds from. | |||||
| :param padding_idx: should be set to None, not supported now. | |||||
| :param max_norm: should be set to None, not supported now. | |||||
| :param norm_type: should be set to None, not supported now. | |||||
| :return: output tensor. | :return: output tensor. | ||||
| Refer to :class:`~.Embedding` for more information. | Refer to :class:`~.Embedding` for more information. | ||||
| @@ -1288,7 +1338,7 @@ def roi_pooling( | |||||
| np.random.seed(42) | np.random.seed(42) | ||||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | inp = tensor(np.random.randn(1, 1, 128, 128)) | ||||
| rois = tensor(np.random.random((4, 5))) | rois = tensor(np.random.random((4, 5))) | ||||
| y = F.roi_pooling(inp, rois, (2, 2)) | |||||
| y = F.nn.roi_pooling(inp, rois, (2, 2)) | |||||
| print(y.numpy()[0]) | print(y.numpy()[0]) | ||||
| Outputs: | Outputs: | ||||
| @@ -1323,14 +1373,14 @@ def roi_align( | |||||
| ) -> Tensor: | ) -> Tensor: | ||||
| """Applies roi align on input feature. | """Applies roi align on input feature. | ||||
| :param inp: tensor that represents the input feature, `(N, C, H, W)` images. | |||||
| :param rois: `(N, 5)` boxes. First column is the index into N. The other 4 columns are xyxy. | |||||
| :param inp: tensor that represents the input feature, shape is `(N, C, H, W)`. | |||||
| :param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``. | |||||
| :param output_shape: `(height, width)` shape of output rois feature. | :param output_shape: `(height, width)` shape of output rois feature. | ||||
| :param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average" | :param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average" | ||||
| :param spatial_scale: scale the input boxes by this number. Default: 1.0 | :param spatial_scale: scale the input boxes by this number. Default: 1.0 | ||||
| :param sample_points: number of inputs samples to take for each output sample. | :param sample_points: number of inputs samples to take for each output sample. | ||||
| 0 to take samples densely. Default: 2 | 0 to take samples densely. Default: 2 | ||||
| :param aligned: wheather align the input feature, with `aligned=True`, | |||||
| :param aligned: wheather to align the input feature, with `aligned=True`, | |||||
| we first appropriately scale the ROI and then shift it by -0.5. Default: True | we first appropriately scale the ROI and then shift it by -0.5. Default: True | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -1345,7 +1395,7 @@ def roi_align( | |||||
| np.random.seed(42) | np.random.seed(42) | ||||
| inp = tensor(np.random.randn(1, 1, 128, 128)) | inp = tensor(np.random.randn(1, 1, 128, 128)) | ||||
| rois = tensor(np.random.random((4, 5))) | rois = tensor(np.random.random((4, 5))) | ||||
| y = F.roi_align(inp, rois, (2, 2)) | |||||
| y = F.nn.roi_align(inp, rois, (2, 2)) | |||||
| print(y.numpy()[0]) | print(y.numpy()[0]) | ||||
| Outputs: | Outputs: | ||||
| @@ -1383,7 +1433,7 @@ def roi_align( | |||||
| def indexing_one_hot( | def indexing_one_hot( | ||||
| src: Tensor, index: Tensor, axis: int = 1, keepdims=False | src: Tensor, index: Tensor, axis: int = 1, keepdims=False | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r"""One-hot indexing for some axis. | |||||
| r"""One-hot indexing for some axes. | |||||
| :param src: input tensor. | :param src: input tensor. | ||||
| :param index: index tensor. | :param index: index tensor. | ||||
| @@ -1417,19 +1467,23 @@ def indexing_one_hot( | |||||
| index = utils.convert_single_value(index, (src,), dtype="int32", device=src.device) | index = utils.convert_single_value(index, (src,), dtype="int32", device=src.device) | ||||
| (result,) = apply(op, src, index) | (result,) = apply(op, src, index) | ||||
| if not keepdims: | if not keepdims: | ||||
| result = remove_axis(result, axis) | |||||
| result = squeeze(result, axis) | |||||
| return result | return result | ||||
| def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor: | |||||
| def nms( | |||||
| boxes: Tensor, scores: Tensor, iou_thresh: float, max_output: Optional[int] = None | |||||
| ) -> Tensor: | |||||
| r""" | r""" | ||||
| Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). | ||||
| :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. | ||||
| :param iou_thresh: iou threshold for overlapping. | |||||
| :param iou_thresh: IoU threshold for overlapping. | |||||
| :param scores: tensor of shape `(N,)`, the score of boxes. | :param scores: tensor of shape `(N,)`, the score of boxes. | ||||
| :param max_output: the maximum number of boxes to keep; it is optional if this operator is not traced | |||||
| otherwise it required to be specified; if it is not specified, all boxes are kept. | |||||
| :return: indices of the elements that have been kept by NMS. | :return: indices of the elements that have been kept by NMS. | ||||
| Examples: | Examples: | ||||
| .. testcode:: | .. testcode:: | ||||
| @@ -1444,13 +1498,13 @@ def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor: | |||||
| x[:,2:] = np.random.rand(100,2)*20 + 100 | x[:,2:] = np.random.rand(100,2)*20 + 100 | ||||
| scores = tensor(np.random.rand(100)) | scores = tensor(np.random.rand(100)) | ||||
| inp = tensor(x) | inp = tensor(x) | ||||
| result = F.nms(inp, scores, iou_thresh=0.7) | |||||
| result = F.nn.nms(inp, scores, iou_thresh=0.7) | |||||
| print(result.numpy()) | print(result.numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [75 69] | [75 69] | ||||
| """ | """ | ||||
| @@ -1466,74 +1520,24 @@ def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor: | |||||
| scores = scores.detach() | scores = scores.detach() | ||||
| sorted_idx = argsort(scores, descending=True) | sorted_idx = argsort(scores, descending=True) | ||||
| boxes = boxes[sorted_idx] | boxes = boxes[sorted_idx] | ||||
| max_output = boxes.shape[0] | |||||
| if is_tracing(): | |||||
| assert ( | |||||
| max_output is not None and max_output > 0 | |||||
| ), "max_output should be specified under tracing" | |||||
| if max_output is None: | |||||
| max_output = boxes.shape[0] | |||||
| op = builtin.NMSKeep(iou_thresh, max_output) | op = builtin.NMSKeep(iou_thresh, max_output) | ||||
| inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) | inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) | ||||
| indices, count = apply(op, *inp) | indices, count = apply(op, *inp) | ||||
| indices = indices[0][: count.item()] | |||||
| indices = indices[0][: count[0]] | |||||
| keep_inds = sorted_idx[indices] | keep_inds = sorted_idx[indices] | ||||
| return keep_inds | return keep_inds | ||||
| def batched_nms( | |||||
| boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float, | |||||
| ) -> Tensor: | |||||
| r""" | |||||
| Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). | |||||
| :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format | |||||
| :param iou_thresh: iou threshold for overlapping | |||||
| :param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch. | |||||
| :param scores: tensor of shape `(N,)`, the score of boxes. | |||||
| :return: indices and the number of the elements that have been kept by NMS | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| from megengine import tensor | |||||
| import megengine.functional as F | |||||
| x = np.zeros((100,4)) | |||||
| np.random.seed(42) | |||||
| x[:,:2] = np.random.rand(100,2)*20 | |||||
| x[:,2:] = np.random.rand(100,2)*20 + 100 | |||||
| scores = tensor(np.random.rand(100)) | |||||
| idxs = tensor(np.random.randint(0, 10, 100)) | |||||
| inp = tensor(x) | |||||
| result = F.batched_nms(inp, scores, idxs, iou_thresh=0.6) | |||||
| print(result.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [75 41 99 98 69 64 11 27 35 18] | |||||
| """ | |||||
| assert ( | |||||
| boxes.ndim == 2 and boxes.shape[1] == 4 | |||||
| ), "the expected shape of boxes is (N, 4)" | |||||
| assert scores.ndim == 1, "the expected shape of scores is (N,)" | |||||
| assert idxs.ndim == 1, "the expected shape of idxs is (N,)" | |||||
| assert boxes.shape[0] == scores.shape[0] == idxs.shape[0] | |||||
| boxes = boxes.detach() | |||||
| scores = scores.detach() | |||||
| idxs = idxs.detach() | |||||
| max_coordinate = boxes.max() | |||||
| offsets = idxs.astype("float32") * (max_coordinate + 1) | |||||
| boxes = boxes + offsets.reshape(-1, 1).broadcast(boxes.shape[0], 4) | |||||
| sorted_idx = argsort(scores, descending=True) | |||||
| boxes = boxes[sorted_idx] | |||||
| max_output = boxes.shape[0] | |||||
| op = builtin.NMSKeep(iou_thresh, max_output) | |||||
| inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) | |||||
| indices, count = apply(op, *inp) | |||||
| indices = indices[0][: count.item()] | |||||
| keep_inds = sorted_idx[indices] | |||||
| return keep_inds | |||||
| from .loss import * # isort:skip | |||||
| from .quantized import conv_bias_activation # isort:skip | |||||
| @@ -1,34 +0,0 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| import numpy as np | |||||
| from ..tensor import Tensor | |||||
| from .distributed import all_reduce_sum | |||||
| from .tensor import param_pack_concat, param_pack_split | |||||
| def get_offsets(shapes): | |||||
| offsets = [] | |||||
| offset = 0 | |||||
| for shape in shapes: | |||||
| offsets.append(offset) | |||||
| offset += int(np.prod(shape)) | |||||
| offsets.append(offset) | |||||
| return offsets | |||||
| def pack_allreduce_split(pack_list, shapes, group, reduce_method): | |||||
| offsets_val = get_offsets(shapes) | |||||
| offsets = Tensor(offsets_val) | |||||
| packed_grads = param_pack_concat(pack_list, offsets, offsets_val) | |||||
| packed_grads = all_reduce_sum(packed_grads, group) | |||||
| if reduce_method == "mean": | |||||
| packed_grads /= group.size | |||||
| grads = param_pack_split(packed_grads, offsets_val, shapes) | |||||
| return grads | |||||
| @@ -34,26 +34,23 @@ def conv_bias_activation( | |||||
| :param weight: convolution kernel. | :param weight: convolution kernel. | ||||
| :param bias: bias added to the result of convolution | :param bias: bias added to the result of convolution | ||||
| :param stride: stride of the 2D convolution operation. Default: 1 | :param stride: stride of the 2D convolution operation. Default: 1 | ||||
| :param padding: size of the paddings added to the input on both sides of its | |||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | |||||
| :param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0 | |||||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | :param dilation: dilation of the 2D convolution operation. Default: 1 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a "grouped convolution". When groups is not 1, | |||||
| in_channels and out_channels must be divisible by groups, | |||||
| :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||||
| and the shape of weight should be `(groups, out_channel // groups, | and the shape of weight should be `(groups, out_channel // groups, | ||||
| in_channels // groups, height, width)`. | in_channels // groups, height, width)`. | ||||
| :type conv_mode: string or :class:`P.Convolution.Mode`. | :type conv_mode: string or :class:`P.Convolution.Mode`. | ||||
| :param conv_mode: supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: | :param conv_mode: supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: | ||||
| 'CROSS_CORRELATION' | 'CROSS_CORRELATION' | ||||
| :param dtype: support for np.dtype, Default: np.int8 | |||||
| :param dtype: support for ``np.dtype``, Default: np.int8 | |||||
| :param scale: scale if use quantization, Default: 0.0 | :param scale: scale if use quantization, Default: 0.0 | ||||
| :param zero_point: scale if use quantization quint8, Default: 0.0 | :param zero_point: scale if use quantization quint8, Default: 0.0 | ||||
| :type compute_mode: string or | :type compute_mode: string or | ||||
| :class:`P.Convolution.ComputeMode`. | :class:`P.Convolution.ComputeMode`. | ||||
| :param compute_mode: when set to 'DEFAULT', no special requirements will be | |||||
| placed on the precision of intermediate results. When set to 'FLOAT32', | |||||
| Float32 would be used for accumulator and intermediate result, but only | |||||
| effective when input and output are of Float16 dtype. | |||||
| :param compute_mode: when set to "DEFAULT", no special requirements will be | |||||
| placed on the precision of intermediate results. When set to "FLOAT32", | |||||
| "Float32" would be used for accumulator and intermediate result, but only effective when input and output are of Float16 dtype. | |||||
| """ | """ | ||||
| ph, pw = _pair(padding) | ph, pw = _pair(padding) | ||||
| @@ -19,6 +19,7 @@ from ..core.ops import builtin | |||||
| from ..core.ops._internal import param_defs as P | from ..core.ops._internal import param_defs as P | ||||
| from ..core.ops.special import Const | from ..core.ops.special import Const | ||||
| from ..core.tensor.core import TensorBase, TensorWrapperBase, apply | from ..core.tensor.core import TensorBase, TensorWrapperBase, apply | ||||
| from ..core.tensor.tensor_wrapper import _broadcast, _remove_axis | |||||
| from ..core.tensor.utils import ( | from ..core.tensor.utils import ( | ||||
| astensor1d, | astensor1d, | ||||
| convert_inputs, | convert_inputs, | ||||
| @@ -31,27 +32,22 @@ from ..tensor import Tensor | |||||
| from .elemwise import ceil | from .elemwise import ceil | ||||
| __all__ = [ | __all__ = [ | ||||
| "add_axis", | |||||
| "arange", | "arange", | ||||
| "broadcast", | |||||
| "broadcast_to", | |||||
| "concat", | "concat", | ||||
| "cond_take", | "cond_take", | ||||
| "transpose", | |||||
| "add_axis", | |||||
| "expand_dims", | |||||
| "eye", | "eye", | ||||
| "flatten", | "flatten", | ||||
| "full", | "full", | ||||
| "full_like", | "full_like", | ||||
| "gather", | "gather", | ||||
| "identity", | |||||
| "linspace", | "linspace", | ||||
| "ones", | "ones", | ||||
| "ones_like", | "ones_like", | ||||
| "param_pack_concat", | |||||
| "param_pack_split", | |||||
| "reshape", | "reshape", | ||||
| "remove_axis", | |||||
| "split", | "split", | ||||
| "squeeze", | |||||
| "stack", | "stack", | ||||
| "scatter", | "scatter", | ||||
| "transpose", | "transpose", | ||||
| @@ -61,11 +57,10 @@ __all__ = [ | |||||
| ] | ] | ||||
| def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||||
| def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||||
| """Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | """Returns a 2D tensor with ones on the diagonal and zeros elsewhere. | ||||
| :param shape: expected shape of otuput tensor. | |||||
| :param m: number of columns. Default: None | |||||
| :param shape: expected shape of output tensor. | |||||
| :param dtype: data type. Default: None | :param dtype: data type. Default: None | ||||
| :param device: compute node of the matrix. Default: None | :param device: compute node of the matrix. Default: None | ||||
| :return: eye matrix. | :return: eye matrix. | ||||
| @@ -77,8 +72,7 @@ def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||||
| import numpy as np | import numpy as np | ||||
| import megengine.functional as F | import megengine.functional as F | ||||
| data_shape = (4, 6) | |||||
| out = F.eye(data_shape, dtype=np.float32) | |||||
| out = F.eye(4, 6, dtype=np.float32) | |||||
| print(out.numpy()) | print(out.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -91,8 +85,17 @@ def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor: | |||||
| [0. 0. 0. 1. 0. 0.]] | [0. 0. 0. 1. 0. 0.]] | ||||
| """ | """ | ||||
| if M is not None: | |||||
| if isinstance(N, Tensor) or isinstance(M, Tensor): | |||||
| shape = astensor1d((N, M)) | |||||
| else: | |||||
| shape = Tensor([N, M], dtype="int32", device=device) | |||||
| elif isinstance(N, Tensor): | |||||
| shape = N | |||||
| else: | |||||
| shape = Tensor(N, dtype="int32", device=device) | |||||
| op = builtin.Eye(k=0, dtype=dtype, comp_node=device) | op = builtin.Eye(k=0, dtype=dtype, comp_node=device) | ||||
| (result,) = apply(op, Tensor(shape, dtype="int32", device=device)) | |||||
| (result,) = apply(op, shape) | |||||
| return result | return result | ||||
| @@ -106,7 +109,7 @@ def full(shape, value, dtype="float32", device=None): | |||||
| (x,) = Const(value, dtype=dtype, device=device)( | (x,) = Const(value, dtype=dtype, device=device)( | ||||
| Tensor(value, dtype=dtype, device=device) | Tensor(value, dtype=dtype, device=device) | ||||
| ) | ) | ||||
| return broadcast(x, shape) | |||||
| return broadcast_to(x, shape) | |||||
| def ones(shape, dtype="float32", device=None): | def ones(shape, dtype="float32", device=None): | ||||
| @@ -160,7 +163,7 @@ def zeros_like(inp: Tensor) -> Tensor: | |||||
| print(out.numpy()) | print(out.numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [[0 0 0] | [[0 0 0] | ||||
| @@ -171,7 +174,7 @@ def zeros_like(inp: Tensor) -> Tensor: | |||||
| def ones_like(inp: Tensor) -> Tensor: | def ones_like(inp: Tensor) -> Tensor: | ||||
| """Returns a identity tensor with the same shape as input tensor. | |||||
| """Returns a ones tensor with the same shape as input tensor. | |||||
| """ | """ | ||||
| return ones(inp.shape, dtype=inp.dtype, device=inp.device) | return ones(inp.shape, dtype=inp.dtype, device=inp.device) | ||||
| @@ -182,19 +185,7 @@ def full_like(inp: Tensor, value: Union[int, float]) -> Tensor: | |||||
| return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | return full(inp.shape, value, dtype=inp.dtype, device=inp.device) | ||||
| def identity(inp: Tensor) -> Tensor: | |||||
| """Applies an identity transform to the input tensor. | |||||
| :param inp: input tensor. | |||||
| :return: output tensor. | |||||
| """ | |||||
| op = builtin.Identity() | |||||
| (data,) = convert_inputs(inp) | |||||
| (output,) = apply(op, data) | |||||
| return output | |||||
| def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor: | |||||
| def broadcast_to(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor: | |||||
| """ | """ | ||||
| Broadcasts a tensor to given shape. | Broadcasts a tensor to given shape. | ||||
| @@ -211,7 +202,7 @@ def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor: | |||||
| import megengine.functional as F | import megengine.functional as F | ||||
| data = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | data = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) | ||||
| out = F.broadcast(data, (4, 2, 3)) | |||||
| out = F.broadcast_to(data, (4, 2, 3)) | |||||
| print(out.numpy()) | print(out.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -231,9 +222,7 @@ def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor: | |||||
| [3. 4. 5.]]] | [3. 4. 5.]]] | ||||
| """ | """ | ||||
| shape = astensor1d(shape, inp, dtype="int32", device=inp.device) | |||||
| (result,) = apply(builtin.Broadcast(), inp, shape) | |||||
| return result | |||||
| return _broadcast(inp, shape) | |||||
| def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | ||||
| @@ -241,8 +230,8 @@ def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: | |||||
| Concat some tensors | Concat some tensors | ||||
| :param inps: input tensors to concat. | :param inps: input tensors to concat. | ||||
| :param axis: dimension over which the tensors are concatenated. Default: 0 | |||||
| :param device: comp node output on. Default: None | |||||
| :param axis: over which dimension the tensors are concatenated. Default: 0 | |||||
| :param device: which device output will be. Default: None | |||||
| :return: output tensor. | :return: output tensor. | ||||
| Examples: | Examples: | ||||
| @@ -290,7 +279,7 @@ def stack(inps, axis=0, device=None): | |||||
| :param inps: input tensors. | :param inps: input tensors. | ||||
| :param axis: which axis will be concatenated. | :param axis: which axis will be concatenated. | ||||
| :param device: The comp node output on. Default: None | |||||
| :param device: the device output will be. Default: None | |||||
| :return: output concatenated tensor. | :return: output concatenated tensor. | ||||
| Examples: | Examples: | ||||
| @@ -322,7 +311,7 @@ def stack(inps, axis=0, device=None): | |||||
| if len(shapes) != 1: | if len(shapes) != 1: | ||||
| raise ValueError("All input tensors must have the same shape") | raise ValueError("All input tensors must have the same shape") | ||||
| inps = [add_axis(inp, axis=axis) for inp in inps] | |||||
| inps = [expand_dims(inp, axis=axis) for inp in inps] | |||||
| return concat(inps, axis=axis, device=device) | return concat(inps, axis=axis, device=device) | ||||
| @@ -331,7 +320,7 @@ def split(inp, nsplits_or_sections, axis=0): | |||||
| When nsplits_or_sections is int, the last tensor may be smaller than others. | When nsplits_or_sections is int, the last tensor may be smaller than others. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param nsplits_or_sections: number of sub tensors or section information list. | |||||
| :param nsplits_or_sections: number of sub tensors or sections information list. | |||||
| :param axis: which axis will be splited. | :param axis: which axis will be splited. | ||||
| :return: output tensor list. | :return: output tensor list. | ||||
| @@ -399,8 +388,7 @@ def _get_idx(index, axis): | |||||
| 0, index.shape[i] - 1, index.shape[i], device=index.device, | 0, index.shape[i] - 1, index.shape[i], device=index.device, | ||||
| ) | ) | ||||
| arange = ( | arange = ( | ||||
| arange.reshape(*shape) | |||||
| .broadcast(index.shape) | |||||
| broadcast_to(arange.reshape(*shape), index.shape) | |||||
| .reshape(-1) | .reshape(-1) | ||||
| .astype(np.int32) | .astype(np.int32) | ||||
| ) | ) | ||||
| @@ -411,7 +399,8 @@ def _get_idx(index, axis): | |||||
| def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | ||||
| r"""Gathers data from inp on axis using index. | |||||
| # TODO: rewrite doc | |||||
| r"""Gathers data from input tensor on axis using index. | |||||
| For a 3-D tensor, the output is specified by:: | For a 3-D tensor, the output is specified by:: | ||||
| @@ -419,14 +408,14 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||||
| out[i][j][k] = inp[i][index[i][j][k]][k] # if axis == 1 | out[i][j][k] = inp[i][index[i][j][k]][k] # if axis == 1 | ||||
| out[i][j][k] = inp[i][j][index[i][j][k]] # if axis == 2 | out[i][j][k] = inp[i][j][index[i][j][k]] # if axis == 2 | ||||
| if inp is an n-dimensional tensor with size | |||||
| if input tensor is a n-dimensional tensor with size | |||||
| :math:`(x_0,x_1,...,x_{i-1},x_i,x_{i+1},...,x_{n-1})` and axis=i, | :math:`(x_0,x_1,...,x_{i-1},x_i,x_{i+1},...,x_{n-1})` and axis=i, | ||||
| then index must be an n-dimensional tensor with size | |||||
| then index must be a n-dimensional tensor with size | |||||
| :math:`(x_0,x_1,...,x_{i-1},y,x_{i+1},...,x_{n-1})` where :math:`y\ge 1` and | :math:`(x_0,x_1,...,x_{i-1},y,x_{i+1},...,x_{n-1})` where :math:`y\ge 1` and | ||||
| output will have the same size as index. | output will have the same size as index. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param axis: axis along which to index. | |||||
| :param axis: along which axis to index. | |||||
| :param index: indices of elements to gather. | :param index: indices of elements to gather. | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -482,20 +471,21 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: | |||||
| def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | ||||
| r"""Writes all values from the tensor source into inp | |||||
| # TODO: rewrite doc | |||||
| r"""Writes all values from the tensor source into input tensor | |||||
| at the indices specified in the index tensor. | at the indices specified in the index tensor. | ||||
| For each value in source, its output index is specified by its index | For each value in source, its output index is specified by its index | ||||
| in source for ``axis != dimension`` and by the corresponding value in | in source for ``axis != dimension`` and by the corresponding value in | ||||
| index for ``axis = dimension``. | index for ``axis = dimension``. | ||||
| For a 3-D tensor, inp is updated as:: | |||||
| For a 3-D tensor, input tensor is updated as:: | |||||
| inp[index[i][j][k]][j][k] = source[i][j][k] # if axis == 0 | inp[index[i][j][k]][j][k] = source[i][j][k] # if axis == 0 | ||||
| inp[i][index[i][j][k]][k] = source[i][j][k] # if axis == 1 | inp[i][index[i][j][k]][k] = source[i][j][k] # if axis == 1 | ||||
| inp[i][j][index[i][j][k]] = source[i][j][k] # if axis == 2 | inp[i][j][index[i][j][k]] = source[i][j][k] # if axis == 2 | ||||
| inp, index and source should have same number of dimensions. | |||||
| ``inp``, ``index`` and ``source`` should have same number of dimensions. | |||||
| It is also required that ``source.shape(d) <= inp.shape(d)`` and ``index.shape(d) == source.shape(d)`` | It is also required that ``source.shape(d) <= inp.shape(d)`` and ``index.shape(d) == source.shape(d)`` | ||||
| for all dimensions ``d``. | for all dimensions ``d``. | ||||
| @@ -504,10 +494,10 @@ def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: | |||||
| .. note:: | .. note:: | ||||
| Please notice that, due to performance issues, the result is uncertain on the GPU device | Please notice that, due to performance issues, the result is uncertain on the GPU device | ||||
| if scatter difference positions from source to the same destination position | |||||
| if scattering different positions from source to the same destination position | |||||
| regard to index tensor. | regard to index tensor. | ||||
| Show the case using the following examples, the oup[0][2] is maybe | |||||
| Check the following examples, the oup[0][2] is maybe | |||||
| from source[0][2] which value is 0.2256 or source[1][2] which value is 0.5339 | from source[0][2] which value is 0.2256 or source[1][2] which value is 0.5339 | ||||
| if set the index[1][2] from 1 to 0. | if set the index[1][2] from 1 to 0. | ||||
| @@ -593,7 +583,7 @@ def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | |||||
| \textrm{out}_i = x_i \textrm{ if } \textrm{mask}_i \textrm{ is True else } y_i | \textrm{out}_i = x_i \textrm{ if } \textrm{mask}_i \textrm{ is True else } y_i | ||||
| :param mask: a mask used for choosing x or y. | |||||
| :param mask: a mask used for choosing ``x`` or ``y``. | |||||
| :param x: first choice. | :param x: first choice. | ||||
| :param y: second choice. | :param y: second choice. | ||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -649,7 +639,7 @@ def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor: | |||||
| def cond_take(mask: Tensor, x: Tensor) -> Tensor: | def cond_take(mask: Tensor, x: Tensor) -> Tensor: | ||||
| r""" | r""" | ||||
| Take elements from data if specific condition is satisfied on mask. | |||||
| Takes elements from data if specific condition is satisfied on mask. | |||||
| This operator has two outputs: the first is the elements taken, | This operator has two outputs: the first is the elements taken, | ||||
| and the second is the indices corresponding to those elements; | and the second is the indices corresponding to those elements; | ||||
| they are both 1-dimensional. High-dimension input would first be flattened. | they are both 1-dimensional. High-dimension input would first be flattened. | ||||
| @@ -696,7 +686,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor: | |||||
| Swaps shapes and strides according to given pattern. | Swaps shapes and strides according to given pattern. | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param pattern: a list of integers including 0, 1, ... , ``ndim``-1, | |||||
| :param pattern: a list of integers including 0, 1, ... , ``ndim``-1, | |||||
| and any number of ``'x'`` char in dimensions where this tensor should be broadcasted. For examples: | and any number of ``'x'`` char in dimensions where this tensor should be broadcasted. For examples: | ||||
| * (``'x'``) -> make a 0d (scalar) into a 1d vector | * (``'x'``) -> make a 0d (scalar) into a 1d vector | ||||
| @@ -707,7 +697,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor: | |||||
| * (2, 0, 1) -> AxBxC to CxAxB | * (2, 0, 1) -> AxBxC to CxAxB | ||||
| * (0, ``'x'``, 1) -> AxB to Ax1xB | * (0, ``'x'``, 1) -> AxB to Ax1xB | ||||
| * (1, ``'x'``, 0) -> AxB to Bx1xA | * (1, ``'x'``, 0) -> AxB to Bx1xA | ||||
| * (1,) -> This remove dimensions 0. It must be a broadcastable dimension (1xA to A) | |||||
| * (1,) -> this removes dimensions 0. It must be a broadcastable dimension (1xA to A) | |||||
| :return: output tensor. | :return: output tensor. | ||||
| @@ -730,13 +720,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor: | |||||
| [1 0]] | [1 0]] | ||||
| """ | """ | ||||
| op = builtin.Dimshuffle(pattern) | |||||
| (inp,) = convert_inputs(inp) | |||||
| (result,) = apply(op, inp) | |||||
| return result | |||||
| dimshuffle = transpose | |||||
| return inp.transpose(pattern) | |||||
| def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: | def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: | ||||
| @@ -745,8 +729,7 @@ def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: | |||||
| remain unchanged | remain unchanged | ||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param target_shape: target shape, the components would be concatenated to form the | |||||
| target shape, and it can contain an element of -1 representing unspec_axis. | |||||
| :param target_shape: target shape, it can contain an element of -1 representing ``unspec_axis``. | |||||
| Examples: | Examples: | ||||
| @@ -773,26 +756,7 @@ def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: | |||||
| [10 11]]] | [10 11]]] | ||||
| """ | """ | ||||
| if isinstance(target_shape, (TensorBase, TensorWrapperBase)): | |||||
| target_shape = target_shape.numpy() | |||||
| target_shape = tuple(map(int, target_shape)) | |||||
| unspec_axis = None | |||||
| for i, s in enumerate(target_shape): | |||||
| if s < 0: | |||||
| if s != -1: | |||||
| raise ValueError("expect shape[{}] >= -1, got {}".format(i, s)) | |||||
| if unspec_axis is not None: | |||||
| raise ValueError("multiple -1 in shape: {} & {}".format(unspec_axis, i)) | |||||
| unspec_axis = i | |||||
| # TODO: device should be None (cpu) | |||||
| (target_shape,) = Const(target_shape, dtype="int32", device=inp.device)(inp) | |||||
| if unspec_axis is None: | |||||
| op = builtin.Reshape() | |||||
| else: | |||||
| op = builtin.Reshape(unspec_axis=unspec_axis) | |||||
| (x,) = apply(op, inp, target_shape) | |||||
| return x | |||||
| return inp.reshape(target_shape) | |||||
| AxisAddRemove = builtin.AxisAddRemove | AxisAddRemove = builtin.AxisAddRemove | ||||
| @@ -837,7 +801,7 @@ def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor: | |||||
| return inp.reshape(*target_shape) | return inp.reshape(*target_shape) | ||||
| def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| def expand_dims(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| r""" | r""" | ||||
| Adds dimension before given axis. | Adds dimension before given axis. | ||||
| @@ -854,7 +818,7 @@ def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| import megengine.functional as F | import megengine.functional as F | ||||
| x = tensor([1, 2]) | x = tensor([1, 2]) | ||||
| out = F.add_axis(x, 0) | |||||
| out = F.expand_dims(x, 0) | |||||
| print(out.shape) | print(out.shape) | ||||
| Outputs: | Outputs: | ||||
| @@ -883,12 +847,7 @@ def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: | |||||
| return result | return result | ||||
| add_axis = add_axis | |||||
| def remove_axis( | |||||
| inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None | |||||
| ) -> Tensor: | |||||
| def squeeze(inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None) -> Tensor: | |||||
| r""" | r""" | ||||
| Removes dimension of shape 1. | Removes dimension of shape 1. | ||||
| @@ -905,7 +864,7 @@ def remove_axis( | |||||
| import megengine.functional as F | import megengine.functional as F | ||||
| x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) | ||||
| out = F.remove_axis(x, 3) | |||||
| out = F.squeeze(x, 3) | |||||
| print(out.shape) | print(out.shape) | ||||
| Outputs: | Outputs: | ||||
| @@ -915,25 +874,7 @@ def remove_axis( | |||||
| (1, 1, 2) | (1, 1, 2) | ||||
| """ | """ | ||||
| Param = builtin.AxisAddRemove.Param | |||||
| def get_axes(): | |||||
| if axis is None: | |||||
| return [i for i, s in enumerate(inp.shape) if s == 1] | |||||
| try: | |||||
| return [int(axis)] | |||||
| except (TypeError, ValueError): | |||||
| pass | |||||
| return list(map(int, axis)) | |||||
| axis = get_axes() | |||||
| axis = sorted(i + inp.ndim if i < 0 else i for i in axis) | |||||
| axis = [a - i for i, a in enumerate(axis)] | |||||
| param = Param(*map(builtin.AxisAddRemove.AxisDesc.make_remove, axis)) | |||||
| op = builtin.AxisAddRemove(param=param) | |||||
| (result,) = apply(op, inp) | |||||
| return result | |||||
| return _remove_axis(inp, axis) | |||||
| def linspace( | def linspace( | ||||
| @@ -962,7 +903,7 @@ def linspace( | |||||
| print(a.numpy()) | print(a.numpy()) | ||||
| Outputs: | Outputs: | ||||
| .. testoutput:: | .. testoutput:: | ||||
| [ 3. 4.75 6.5 8.25 10. ] | [ 3. 4.75 6.5 8.25 10. ] | ||||
| @@ -982,15 +923,15 @@ def linspace( | |||||
| def arange( | def arange( | ||||
| start: Union[int, float, Tensor] = 0, | start: Union[int, float, Tensor] = 0, | ||||
| end: Optional[Union[int, float, Tensor]] = None, | |||||
| stop: Optional[Union[int, float, Tensor]] = None, | |||||
| step: Union[int, float, Tensor] = 1, | step: Union[int, float, Tensor] = 1, | ||||
| dtype="float32", | dtype="float32", | ||||
| device: Optional[CompNode] = None, | device: Optional[CompNode] = None, | ||||
| ) -> Tensor: | ) -> Tensor: | ||||
| r"""Returns a Tensor with values from start to end with adjacent interval step. | |||||
| r"""Returns a tensor with values from start to stop with adjacent interval step. | |||||
| :param start: starting value of the squence, shoule be scalar. | :param start: starting value of the squence, shoule be scalar. | ||||
| :param end: ending value of the squence, shoule be scalar. | |||||
| :param stop: ending value of the squence, shoule be scalar. | |||||
| :param step: gap between each pair of adjacent values. Default: 1 | :param step: gap between each pair of adjacent values. Default: 1 | ||||
| :param dtype: result data type. | :param dtype: result data type. | ||||
| :return: generated tensor. | :return: generated tensor. | ||||
| @@ -1004,7 +945,7 @@ def arange( | |||||
| a = F.arange(5) | a = F.arange(5) | ||||
| print(a.numpy()) | print(a.numpy()) | ||||
| Outputs: | Outputs: | ||||
| Outputs: | Outputs: | ||||
| @@ -1014,96 +955,18 @@ def arange( | |||||
| [0. 1. 2. 3. 4.] | [0. 1. 2. 3. 4.] | ||||
| """ | """ | ||||
| if end is None: | |||||
| start, end = 0, start | |||||
| if stop is None: | |||||
| start, stop = 0, start | |||||
| if isinstance(start, Tensor): | if isinstance(start, Tensor): | ||||
| start = start.astype("float32") | start = start.astype("float32") | ||||
| if isinstance(end, Tensor): | |||||
| end = end.astype("float32") | |||||
| if isinstance(stop, Tensor): | |||||
| stop = stop.astype("float32") | |||||
| if isinstance(step, Tensor): | if isinstance(step, Tensor): | ||||
| step = step.astype("float32") | step = step.astype("float32") | ||||
| num = ceil(Tensor((end - start) / step, device=device)) | |||||
| num = ceil(Tensor((stop - start) / step, device=device)) | |||||
| stop = start + step * (num - 1) | stop = start + step * (num - 1) | ||||
| result = linspace(start, stop, num, device=device) | result = linspace(start, stop, num, device=device) | ||||
| if np.dtype(dtype) == np.int32: | if np.dtype(dtype) == np.int32: | ||||
| return result.astype(dtype) | return result.astype(dtype) | ||||
| return result | return result | ||||
| def param_pack_split(inp: Tensor, offsets: List, shapes: List) -> Tensor: | |||||
| r""" | |||||
| Returns split Tensor to Tensor list as offsets and shapes described, | |||||
| only used for parampack. | |||||
| :param inp: input tensor. | |||||
| :param offsets: offsets of outputs, length of 2 * n, | |||||
| while n is tensor nums you want to split, | |||||
| format `[begin0, end0, begin1, end1]`. | |||||
| :param shapes: tensor shapes of outputs. | |||||
| :return: split tensors. | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| import megengine.functional as F | |||||
| from megengine import tensor | |||||
| a = tensor(np.ones((10,), np.int32)) | |||||
| b, c = F.param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)]) | |||||
| print(b.numpy()) | |||||
| print(c.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [1] | |||||
| [[1 1 1] | |||||
| [1 1 1] | |||||
| [1 1 1]] | |||||
| """ | |||||
| op = builtin.ParamPackSplit() | |||||
| op.offsets = offsets | |||||
| op.shapes = shapes | |||||
| return apply(op, inp) | |||||
| def param_pack_concat(inps: List, offsets: Tensor, offsets_val: List) -> Tensor: | |||||
| r""" | |||||
| Returns concat Tensor, only used for parampack. | |||||
| :param inps: input tensors. | |||||
| :param offsets: device value of offsets. | |||||
| :param offsets_val: offsets of inputs, length of 2 * n, | |||||
| format [begin0, end0, begin1, end1]. | |||||
| :return: concat tensors | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| import megengine.functional as F | |||||
| from megengine import tensor | |||||
| a = tensor(np.ones((1,), np.int32)) | |||||
| b = tensor(np.ones((3, 3), np.int32)) | |||||
| offsets_val = [0, 1, 1, 10] | |||||
| offsets = tensor(offsets_val, np.int32) | |||||
| c = F.param_pack_concat([a, b], offsets, offsets_val) | |||||
| print(c.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [1 1 1 1 1 1 1 1 1 1] | |||||
| """ | |||||
| op = builtin.ParamPackConcat() | |||||
| op.offsets = offsets_val | |||||
| return apply(op, *inps, offsets)[0] | |||||
| @@ -11,18 +11,24 @@ from typing import Iterable, Union | |||||
| import numpy as np | import numpy as np | ||||
| from ..core.ops.builtin import Copy | |||||
| from ..core._wrap import device as as_device | |||||
| from ..core.ops.builtin import Copy, Identity | |||||
| from ..core.tensor import Tensor | from ..core.tensor import Tensor | ||||
| from ..core.tensor.core import apply | from ..core.tensor.core import apply | ||||
| from .math import topk as _topk | from .math import topk as _topk | ||||
| from .tensor import transpose as _transpose | |||||
| from .tensor import broadcast_to, transpose | |||||
| __all__ = [ | |||||
| "topk_accuracy", | |||||
| "copy", | |||||
| ] | |||||
| def accuracy( | |||||
| def topk_accuracy( | |||||
| logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 | logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 | ||||
| ) -> Union[Tensor, Iterable[Tensor]]: | ) -> Union[Tensor, Iterable[Tensor]]: | ||||
| r""" | r""" | ||||
| Calculate the classification accuracy given predicted logits and ground-truth labels. | |||||
| Calculates the classification accuracy given predicted logits and ground-truth labels. | |||||
| :param logits: model predictions of shape `[batch_size, num_classes]`, | :param logits: model predictions of shape `[batch_size, num_classes]`, | ||||
| representing the probability (likelyhood) of each class. | representing the probability (likelyhood) of each class. | ||||
| @@ -40,7 +46,7 @@ def accuracy( | |||||
| logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) | logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) | ||||
| target = tensor(np.arange(8, dtype=np.int32)) | target = tensor(np.arange(8, dtype=np.int32)) | ||||
| top1, top5 = F.accuracy(logits, target, (1, 5)) | |||||
| top1, top5 = F.topk_accuracy(logits, target, (1, 5)) | |||||
| print(top1.numpy(), top5.numpy()) | print(top1.numpy(), top5.numpy()) | ||||
| Outputs: | Outputs: | ||||
| @@ -54,8 +60,8 @@ def accuracy( | |||||
| _, pred = _topk(logits, k=max(topk), descending=True) | _, pred = _topk(logits, k=max(topk), descending=True) | ||||
| accs = [] | accs = [] | ||||
| for k in topk: | for k in topk: | ||||
| correct = pred[:, :k].detach() == _transpose(target, (0, "x")).broadcast( | |||||
| target.shape[0], k | |||||
| correct = pred[:, :k].detach() == broadcast_to( | |||||
| transpose(target, (0, "x")), (target.shape[0], k) | |||||
| ) | ) | ||||
| accs.append(correct.astype(np.float32).sum() / target.shape[0]) | accs.append(correct.astype(np.float32).sum() / target.shape[0]) | ||||
| if len(topk) == 1: # type: ignore[arg-type] | if len(topk) == 1: # type: ignore[arg-type] | ||||
| @@ -63,25 +69,12 @@ def accuracy( | |||||
| return accs | return accs | ||||
| def zero_grad(inp: Tensor) -> Tensor: | |||||
| r""" | |||||
| Returns a tensor which is treated as constant during backward gradient calcuation, | |||||
| i.e. its gradient is zero. | |||||
| :param inp: Input tensor. | |||||
| See implementation of :func:`~.softmax` for example. | |||||
| """ | |||||
| print("zero_grad is obsoleted, please use detach instead") | |||||
| raise NotImplementedError | |||||
| def copy(inp, cn): | |||||
| def copy(inp, device=None): | |||||
| r""" | r""" | ||||
| Copy tensor to another device. | |||||
| Copies tensor to another device. | |||||
| :param inp: input tensor. | :param inp: input tensor. | ||||
| :param cn: device that you copy to. | |||||
| :param device: destination device. | |||||
| Examples: | Examples: | ||||
| @@ -101,4 +94,6 @@ def copy(inp, cn): | |||||
| [1 2 3] | [1 2 3] | ||||
| """ | """ | ||||
| return apply(Copy(comp_node=cn), inp)[0] | |||||
| if device is None: | |||||
| return apply(Identity(), inp)[0] | |||||
| return apply(Copy(comp_node=as_device(device).to_c()), inp)[0] | |||||
| @@ -19,12 +19,12 @@ class InvalidGitHost(FetcherError): | |||||
| class GitPullError(FetcherError): | class GitPullError(FetcherError): | ||||
| """A git pull error occurred""" | |||||
| """A git pull error occurred.""" | |||||
| class GitCheckoutError(FetcherError): | class GitCheckoutError(FetcherError): | ||||
| """A git checkout error occurred""" | |||||
| """A git checkout error occurred.""" | |||||
| class InvalidProtocol(FetcherError): | class InvalidProtocol(FetcherError): | ||||
| """The protocol provided was somehow invalid""" | |||||
| """The protocol provided was somehow invalid.""" | |||||
| @@ -106,20 +106,20 @@ class GitSSHFetcher(RepoFetcherBase): | |||||
| :param git_host: | :param git_host: | ||||
| host address of git repo. | host address of git repo. | ||||
| example: github.com | |||||
| Example: github.com | |||||
| :param repo_info: | :param repo_info: | ||||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
| tag/branch. The default branch is ``master`` if not specified. | tag/branch. The default branch is ``master`` if not specified. | ||||
| example: ``"brain_sdk/MegBrain[:hub]"`` | |||||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally fetched code or completely re-fetch | |||||
| whether to use locally fetched code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param silent: | :param silent: | ||||
| whether to accept the stdout and stderr of the subprocess with PIPE, instead of | whether to accept the stdout and stderr of the subprocess with PIPE, instead of | ||||
| displaying on the screen | |||||
| displaying on the screen. | |||||
| :return: | :return: | ||||
| directory where the repo code is stored | |||||
| directory where the repo code is stored. | |||||
| """ | """ | ||||
| if not cls._check_git_host(git_host): | if not cls._check_git_host(git_host): | ||||
| raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) | raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) | ||||
| @@ -215,24 +215,24 @@ class GitHTTPSFetcher(RepoFetcherBase): | |||||
| silent: bool = True, | silent: bool = True, | ||||
| ) -> str: | ) -> str: | ||||
| """ | """ | ||||
| Fetches git repo by HTTPS protocol | |||||
| Fetches git repo by HTTPS protocol. | |||||
| :param git_host: | :param git_host: | ||||
| host address of git repo | |||||
| example: github.com | |||||
| host address of git repo. | |||||
| Example: github.com | |||||
| :param repo_info: | :param repo_info: | ||||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
| tag/branch. The default branch is ``master`` if not specified. | tag/branch. The default branch is ``master`` if not specified. | ||||
| example: ``"brain_sdk/MegBrain[:hub]"`` | |||||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | |||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally cached code or completely re-fetch | |||||
| whether to use locally cached code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param silent: | :param silent: | ||||
| whether to accept the stdout and stderr of the subprocess with PIPE, instead of | whether to accept the stdout and stderr of the subprocess with PIPE, instead of | ||||
| displaying on the screen | |||||
| displaying on the screen. | |||||
| :return: | :return: | ||||
| directory where the repo code is stored | |||||
| directory where the repo code is stored. | |||||
| """ | """ | ||||
| if not cls._check_git_host(git_host): | if not cls._check_git_host(git_host): | ||||
| raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) | raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) | ||||
| @@ -94,24 +94,24 @@ def _init_hub( | |||||
| commit: str = None, | commit: str = None, | ||||
| protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
| ): | ): | ||||
| """Imports hubmodule like python import | |||||
| """Imports hubmodule like python import. | |||||
| :param repo_info: | :param repo_info: | ||||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
| tag/branch. The default branch is ``master`` if not specified. | tag/branch. The default branch is ``master`` if not specified. | ||||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | Example: ``"brain_sdk/MegBrain[:hub]"`` | ||||
| :param git_host: | :param git_host: | ||||
| host address of git repo | |||||
| host address of git repo. | |||||
| Example: github.com | Example: github.com | ||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally cached code or completely re-fetch | |||||
| whether to use locally cached code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param protocol: | :param protocol: | ||||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | ||||
| The value should be one of HTTPS, SSH. | The value should be one of HTTPS, SSH. | ||||
| :return: | :return: | ||||
| hubconf.py as a python module | |||||
| a python module. | |||||
| """ | """ | ||||
| cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) | cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) | ||||
| os.makedirs(cache_dir, exist_ok=True) | os.makedirs(cache_dir, exist_ok=True) | ||||
| @@ -137,24 +137,24 @@ def list( | |||||
| commit: str = None, | commit: str = None, | ||||
| protocol: str = DEFAULT_PROTOCOL, | protocol: str = DEFAULT_PROTOCOL, | ||||
| ) -> List[str]: | ) -> List[str]: | ||||
| """Lists all entrypoints available in repo hubconf | |||||
| """Lists all entrypoints available in repo hubconf. | |||||
| :param repo_info: | :param repo_info: | ||||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
| tag/branch. The default branch is ``master`` if not specified. | tag/branch. The default branch is ``master`` if not specified. | ||||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | Example: ``"brain_sdk/MegBrain[:hub]"`` | ||||
| :param git_host: | :param git_host: | ||||
| host address of git repo | |||||
| host address of git repo. | |||||
| Example: github.com | Example: github.com | ||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally cached code or completely re-fetch | |||||
| whether to use locally cached code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param protocol: | :param protocol: | ||||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | ||||
| The value should be one of HTTPS, SSH. | The value should be one of HTTPS, SSH. | ||||
| :return: | :return: | ||||
| all entrypoint names of the model | |||||
| all entrypoint names of the model. | |||||
| """ | """ | ||||
| hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | ||||
| @@ -182,14 +182,14 @@ def load( | |||||
| tag/branch. The default branch is ``master`` if not specified. | tag/branch. The default branch is ``master`` if not specified. | ||||
| Example: ``"brain_sdk/MegBrain[:hub]"`` | Example: ``"brain_sdk/MegBrain[:hub]"`` | ||||
| :param entry: | :param entry: | ||||
| an entrypoint defined in hubconf | |||||
| an entrypoint defined in hubconf. | |||||
| :param git_host: | :param git_host: | ||||
| host address of git repo | |||||
| host address of git repo. | |||||
| Example: github.com | Example: github.com | ||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally cached code or completely re-fetch | |||||
| whether to use locally cached code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param protocol: | :param protocol: | ||||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | ||||
| The value should be one of HTTPS, SSH. | The value should be one of HTTPS, SSH. | ||||
| @@ -217,9 +217,9 @@ def help( | |||||
| ) -> str: | ) -> str: | ||||
| """This function returns docstring of entrypoint ``entry`` by following steps: | """This function returns docstring of entrypoint ``entry`` by following steps: | ||||
| 1. Pull the repo code specified by git and repo_info | |||||
| 1. Pull the repo code specified by git and repo_info. | |||||
| 2. Load the entry defined in repo's hubconf.py | 2. Load the entry defined in repo's hubconf.py | ||||
| 3. Return docstring of function entry | |||||
| 3. Return docstring of function entry. | |||||
| :param repo_info: | :param repo_info: | ||||
| a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional | ||||
| @@ -228,17 +228,17 @@ def help( | |||||
| :param entry: | :param entry: | ||||
| an entrypoint defined in hubconf.py | an entrypoint defined in hubconf.py | ||||
| :param git_host: | :param git_host: | ||||
| host address of git repo | |||||
| host address of git repo. | |||||
| Example: github.com | Example: github.com | ||||
| :param use_cache: | :param use_cache: | ||||
| whether to use locally cached code or completely re-fetch | |||||
| whether to use locally cached code or completely re-fetch. | |||||
| :param commit: | :param commit: | ||||
| commit id on github or gitlab | |||||
| commit id on github or gitlab. | |||||
| :param protocol: | :param protocol: | ||||
| which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. | ||||
| The value should be one of HTTPS, SSH. | The value should be one of HTTPS, SSH. | ||||
| :return: | :return: | ||||
| docstring of entrypoint ``entry`` | |||||
| docstring of entrypoint ``entry``. | |||||
| """ | """ | ||||
| hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) | ||||
| @@ -255,10 +255,10 @@ def load_serialized_obj_from_url(url: str, model_dir=None) -> Any: | |||||
| If the object is already present in ``model_dir``, it's deserialized and | If the object is already present in ``model_dir``, it's deserialized and | ||||
| returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. | ||||
| :param url: url to serialized object | |||||
| :param model_dir: dir to cache target serialized file | |||||
| :param url: url to serialized object. | |||||
| :param model_dir: dir to cache target serialized file. | |||||
| :return: loaded object | |||||
| :return: loaded object. | |||||
| """ | """ | ||||
| if model_dir is None: | if model_dir is None: | ||||
| model_dir = os.path.join(_get_megengine_home(), "serialized") | model_dir = os.path.join(_get_megengine_home(), "serialized") | ||||
| @@ -15,10 +15,10 @@ from typing import Iterator | |||||
| def load_module(name: str, path: str) -> types.ModuleType: | def load_module(name: str, path: str) -> types.ModuleType: | ||||
| """ | """ | ||||
| Loads module specified by name and path | |||||
| Loads module specified by name and path. | |||||
| :param name: module name | |||||
| :param path: module path | |||||
| :param name: module name. | |||||
| :param path: module path. | |||||
| """ | """ | ||||
| spec = importlib.util.spec_from_file_location(name, path) | spec = importlib.util.spec_from_file_location(name, path) | ||||
| module = importlib.util.module_from_spec(spec) | module = importlib.util.module_from_spec(spec) | ||||
| @@ -27,18 +27,18 @@ def load_module(name: str, path: str) -> types.ModuleType: | |||||
| def check_module_exists(module: str) -> bool: | def check_module_exists(module: str) -> bool: | ||||
| """Checks whether python module exists or not | |||||
| """Checks whether python module exists or not. | |||||
| :param module: name of module | |||||
| :param module: name of module. | |||||
| """ | """ | ||||
| return importlib.util.find_spec(module) is not None | return importlib.util.find_spec(module) is not None | ||||
| @contextmanager | @contextmanager | ||||
| def cd(target: str) -> Iterator[None]: | def cd(target: str) -> Iterator[None]: | ||||
| """Changes current directory to target | |||||
| """Changes current directory to target. | |||||
| :param target: target directory | |||||
| :param target: target directory. | |||||
| """ | """ | ||||
| prev = os.getcwd() | prev = os.getcwd() | ||||
| os.chdir(os.path.expanduser(target)) | os.chdir(os.path.expanduser(target)) | ||||
| @@ -36,6 +36,13 @@ active_trace = None | |||||
| skip_tracing = False | skip_tracing = False | ||||
| def is_tracing(): | |||||
| if active_trace is None: | |||||
| return False | |||||
| else: | |||||
| return not skip_tracing | |||||
| @contextlib.contextmanager | @contextlib.contextmanager | ||||
| def exclude_from_trace(): | def exclude_from_trace(): | ||||
| global skip_tracing | global skip_tracing | ||||
| @@ -125,6 +132,9 @@ class trace: | |||||
| self._graph_opt_level = opt_level | self._graph_opt_level = opt_level | ||||
| self._tensor_shape = tensor_shape | self._tensor_shape = tensor_shape | ||||
| self._reset() | |||||
| def _reset(self): | |||||
| self._untraced = True | self._untraced = True | ||||
| self._tinfo = [] # handle -> TensorInfo | self._tinfo = [] # handle -> TensorInfo | ||||
| self._seq = [] | self._seq = [] | ||||
| @@ -257,77 +267,117 @@ class trace: | |||||
| def _record_const(self, op, outputs): | def _record_const(self, op, outputs): | ||||
| pass | pass | ||||
| @contextlib.contextmanager | |||||
| def _setup(self): | |||||
| def _set_active(self, active: bool): | |||||
| global active_trace | global active_trace | ||||
| if active_trace: | |||||
| raise NotImplementedError("sorry, not implemented: nested trace") | |||||
| active_trace = self | |||||
| if self._untraced: | |||||
| apply.enable(apply_with_tracing) | |||||
| apply.enable(apply_const_with_tracing) | |||||
| if self._symbolic: | |||||
| apply.enable(apply_symbolic_mode) | |||||
| apply.enable(apply_const_symbolic_mode) | |||||
| self._lazy_eval_graph = G.Graph() | |||||
| if active: | |||||
| if active_trace: | |||||
| raise NotImplementedError("sorry, not implemented: nested trace") | |||||
| active_trace = self | |||||
| else: | else: | ||||
| apply.enable(apply_compiled_mode) | |||||
| if self._graph is None: | |||||
| self._compile() | |||||
| self._graph.execute() | |||||
| yield | |||||
| assert active_trace is self | |||||
| active_trace = None | |||||
| def _init_trace(self, symbolic: bool): | |||||
| apply.enable(apply_with_tracing) | |||||
| apply.enable(apply_const_with_tracing) | |||||
| if symbolic: | |||||
| apply.enable(apply_symbolic_mode) | |||||
| apply.enable(apply_const_symbolic_mode) | |||||
| self._lazy_eval_graph = G.Graph() | |||||
| def _take_escaped_tensors(self): | |||||
| escaped_tensors = tuple(self._active_tensors) | escaped_tensors = tuple(self._active_tensors) | ||||
| self._active_tensors.clear() | self._active_tensors.clear() | ||||
| return escaped_tensors | |||||
| if self._untraced: | |||||
| for x in escaped_tensors: | |||||
| info = self._tinfo[x._TraceMixin__handle] | |||||
| info.data_read = True | |||||
| x._TraceMixin__restore() | |||||
| if self._inputs_to_restore: | |||||
| for x in self._inputs_to_restore: | |||||
| def _lazy_eval(self, lazy_eval_graph, lazy_eval_tensors): | |||||
| active_lazy_eval_tensors = [] | |||||
| visited = set() | |||||
| readers = [] | |||||
| for x in lazy_eval_tensors: | |||||
| x = x() | |||||
| if x is None or x in visited: | |||||
| continue | |||||
| reader = G.OutputNode(x._LazyEvalTensor__varnode).outputs[0] | |||||
| readers.append(reader) | |||||
| active_lazy_eval_tensors.append(x) | |||||
| visited.add(x) | |||||
| self._apply_graph_options(lazy_eval_graph) | |||||
| lazy_eval_graph.compile(*readers) | |||||
| lazy_eval_graph() | |||||
| for r, x in zip(readers, active_lazy_eval_tensors): | |||||
| assign_raw_tensor(x, as_raw_tensor(r.op.get_value())) | |||||
| @contextlib.contextmanager | |||||
| def _setup(self): | |||||
| interrupted = False | |||||
| def do_enter(): | |||||
| self._set_active(True) | |||||
| if self._untraced: | |||||
| self._init_trace(self._symbolic) | |||||
| else: | |||||
| apply.enable(apply_compiled_mode) | |||||
| if self._graph is None: | |||||
| self._compile() | |||||
| self._graph.execute() | |||||
| def do_finalize(): | |||||
| escaped_tensors = self._take_escaped_tensors() | |||||
| if self._untraced: | |||||
| for x in escaped_tensors: | |||||
| info = self._tinfo[x._TraceMixin__handle] | |||||
| info.data_read = True | |||||
| x._TraceMixin__restore() | x._TraceMixin__restore() | ||||
| if self._symbolic: | |||||
| # eval lazy eval tensors | |||||
| if self._lazy_eval_tensors: | |||||
| lazy_eval_tensors = [] | |||||
| visited = set() | |||||
| readers = [] | |||||
| for x in self._lazy_eval_tensors: | |||||
| x = x() | |||||
| if x is None or x in visited: | |||||
| continue | |||||
| reader = G.OutputNode(x._LazyEvalTensor__varnode).outputs[0] | |||||
| readers.append(reader) | |||||
| lazy_eval_tensors.append(x) | |||||
| visited.add(x) | |||||
| self._apply_graph_options(self._lazy_eval_graph) | |||||
| self._lazy_eval_graph.compile(*readers) | |||||
| self._lazy_eval_graph() | |||||
| for r, x in zip(readers, lazy_eval_tensors): | |||||
| assign_raw_tensor(x, as_raw_tensor(r.op.get_value())) | |||||
| if self._inputs_to_restore: | |||||
| for x in self._inputs_to_restore: | |||||
| x._TraceMixin__restore() | |||||
| if self._symbolic and self._lazy_eval_tensors: | |||||
| # eval lazy eval tensors | |||||
| self._lazy_eval(self._lazy_eval_graph, self._lazy_eval_tensors) | |||||
| self._lazy_eval_graph = None | self._lazy_eval_graph = None | ||||
| self._lazy_eval_tensors = None | self._lazy_eval_tensors = None | ||||
| self._untraced = False | |||||
| else: | |||||
| if self._pc != len(self._seq): | |||||
| raise TraceMismatchError("premature end") | |||||
| for x in escaped_tensors: | |||||
| assign_raw_tensor(x, as_raw_tensor(x._dev_tensor())) | |||||
| self._graph.wait() | |||||
| self._reset_exec_env() | |||||
| self._untraced = False | |||||
| else: | |||||
| # compiled_tensor leaks | |||||
| if self._pc == len(self._seq): | |||||
| for x in escaped_tensors: | |||||
| try: | |||||
| assign_raw_tensor(x, as_raw_tensor(x._dev_tensor())) | |||||
| except TraceMismatchError: | |||||
| # TraceMismatchError thrown in do_exit | |||||
| pass | |||||
| self._graph.wait() | |||||
| self._reset_exec_env() | |||||
| # reset status | |||||
| self._pc = 0 | self._pc = 0 | ||||
| self._tensor_remaps = None | |||||
| apply.disable(apply_with_tracing) | |||||
| apply.disable(apply_const_with_tracing) | |||||
| apply.disable(apply_symbolic_mode) | |||||
| apply.disable(apply_const_symbolic_mode) | |||||
| apply.disable(apply_compiled_mode) | |||||
| active_trace = None | |||||
| self._tensor_remaps = None | |||||
| apply.disable(apply_with_tracing) | |||||
| apply.disable(apply_const_with_tracing) | |||||
| apply.disable(apply_symbolic_mode) | |||||
| apply.disable(apply_const_symbolic_mode) | |||||
| apply.disable(apply_compiled_mode) | |||||
| self._set_active(False) | |||||
| def do_exit(): | |||||
| if not self._untraced and self._pc != len(self._seq): | |||||
| raise TraceMismatchError("premature end") | |||||
| if not self._symbolic or not self._untraced: | |||||
| for x in self._active_tensors: | |||||
| x._dev_tensor() | |||||
| try: | |||||
| do_enter() | |||||
| yield | |||||
| do_exit() | |||||
| except: | |||||
| interrupted = True | |||||
| raise | |||||
| finally: | |||||
| do_finalize() | |||||
| if interrupted: | |||||
| self._reset() | |||||
| def _begin_excluded_region(self): | def _begin_excluded_region(self): | ||||
| if self._capture_as_const: | if self._capture_as_const: | ||||
| @@ -368,6 +418,7 @@ class trace: | |||||
| def _compile(self): | def _compile(self): | ||||
| graph = self._graph = G.Graph() | graph = self._graph = G.Graph() | ||||
| graph.options.no_force_inplace = True | graph.options.no_force_inplace = True | ||||
| graph.options.async_exec_level = 0b100 | |||||
| self._apply_graph_options(graph) | self._apply_graph_options(graph) | ||||
| # graph.options.graph_opt_level = 0 | # graph.options.graph_opt_level = 0 | ||||
| need_reset_nodes = self._need_reset_nodes = [] | need_reset_nodes = self._need_reset_nodes = [] | ||||
| @@ -570,7 +621,9 @@ class trace: | |||||
| if h not in h2v: | if h not in h2v: | ||||
| assert info.external | assert info.external | ||||
| assert info.bound_data | assert info.bound_data | ||||
| h2v[h] = graph.make_const(info.bound_data._dev_tensor()) | |||||
| h2v[h] = graph.make_const( | |||||
| info.bound_data.numpy(), dtype=info.dtype, device=info.device | |||||
| ) | |||||
| ivars.append(h2v[h]) | ivars.append(h2v[h]) | ||||
| ovars = apply(op, *ivars) | ovars = apply(op, *ivars) | ||||
| assert len(ovars) == len(ohandles) | assert len(ovars) == len(ohandles) | ||||
| @@ -12,7 +12,7 @@ import os | |||||
| import sys | import sys | ||||
| _all_loggers = [] | _all_loggers = [] | ||||
| _default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "ERROR") | |||||
| _default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "INFO") | |||||
| _default_level = logging.getLevelName(_default_level_name.upper()) | _default_level = logging.getLevelName(_default_level_name.upper()) | ||||
| @@ -8,6 +8,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax | from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax | ||||
| from .adaptive_pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d | |||||
| from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm | from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm | ||||
| from .concat import Concat | from .concat import Concat | ||||
| from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d | from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d | ||||
| @@ -20,10 +20,10 @@ class Softmax(Module): | |||||
| .. math:: | .. math:: | ||||
| \text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} | \text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} | ||||
| It is applied to an n-dimensional input Tensor and rescaling them so that the elements of the | |||||
| n-dimensional output Tensor lie in the range of `[0, 1]` and sum to 1. | |||||
| It is applied to all elements along axis, and rescales elements so that | |||||
| they stay in the range `[0, 1]` and sum to 1. | |||||
| :param axis: An axis along which softmax will be applied. By default, | |||||
| :param axis: Along which axis softmax will be applied. By default, | |||||
| softmax will apply along the highest ranked axis. | softmax will apply along the highest ranked axis. | ||||
| Examples: | Examples: | ||||
| @@ -55,6 +55,9 @@ class Softmax(Module): | |||||
| def forward(self, inputs): | def forward(self, inputs): | ||||
| return softmax(inputs, self.axis) | return softmax(inputs, self.axis) | ||||
| def _module_info_string(self) -> str: | |||||
| return "axis={axis}".format(axis=self.axis) | |||||
| class Sigmoid(Module): | class Sigmoid(Module): | ||||
| r""" | r""" | ||||
| @@ -138,8 +141,7 @@ class PReLU(Module): | |||||
| \end{cases} | \end{cases} | ||||
| Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses | Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses | ||||
| a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, | |||||
| a seperate :math:`a` is used for each input channle. | |||||
| a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, each input channle will has it's own :math:`a`. | |||||
| :param num_parameters: number of :math:`a` to learn, there is only two | :param num_parameters: number of :math:`a` to learn, there is only two | ||||
| values are legitimate: 1, or the number of channels at input. Default: 1 | values are legitimate: 1, or the number of channels at input. Default: 1 | ||||
| @@ -0,0 +1,114 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| from abc import abstractmethod | |||||
| from typing import Tuple, Union | |||||
| from ..functional import adaptive_avg_pool2d, adaptive_max_pool2d | |||||
| from ..tensor import Parameter, Tensor | |||||
| from .module import Module | |||||
| class _AdaptivePoolNd(Module): | |||||
| def __init__( | |||||
| self, oshp: Union[Tuple[int, int], int, Tensor], | |||||
| ): | |||||
| super(_AdaptivePoolNd, self).__init__() | |||||
| self.oshp = oshp | |||||
| @abstractmethod | |||||
| def forward(self, inp): | |||||
| pass | |||||
| class AdaptiveMaxPool2d(_AdaptivePoolNd): | |||||
| r"""Applies a 2D max adaptive pooling over an input. | |||||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||||
| the size :math:`(N, C, OH, OW)` through a process described as: | |||||
| .. math:: | |||||
| \begin{aligned} | |||||
| out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} | |||||
| \text{input}(N_i, C_j, \text{stride[0]} \times h + m, | |||||
| \text{stride[1]} \times w + n) | |||||
| \end{aligned} | |||||
| Kernel_size and stride can be inferred from input shape and out shape: | |||||
| padding: (0, 0) | |||||
| stride: (floor(IH / OH), floor(IW / OW)) | |||||
| kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| import megengine as mge | |||||
| import megengine.module as M | |||||
| m = M.AdaptiveMaxPool2d((2, 2)) | |||||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||||
| oup = m(inp) | |||||
| print(oup.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [[[[5. 7.] | |||||
| [13. 15.]]]] | |||||
| """ | |||||
| def forward(self, inp): | |||||
| return adaptive_max_pool2d(inp, self.oshp) | |||||
| class AdaptiveAvgPool2d(_AdaptivePoolNd): | |||||
| r"""Applies a 2D average pooling over an input. | |||||
| For instance, given an input of the size :math:`(N, C, H, W)` and | |||||
| an output shape :math:`(OH, OW)`, this layer generates the output of | |||||
| the size :math:`(N, C, OH, OW)` through a process described as: | |||||
| .. math:: | |||||
| out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} | |||||
| input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) | |||||
| Kernel_size and stride can be inferred from input shape and out shape: | |||||
| padding: (0, 0) | |||||
| stride: (floor(IH / OH), floor(IW / OW)) | |||||
| kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w) | |||||
| Examples: | |||||
| .. testcode:: | |||||
| import numpy as np | |||||
| import megengine as mge | |||||
| import megengine.module as M | |||||
| m = M.AdaptiveAvgPool2d((2, 2)) | |||||
| inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4)) | |||||
| oup = m(inp) | |||||
| print(oup.numpy()) | |||||
| Outputs: | |||||
| .. testoutput:: | |||||
| [[[[2.5 4.5] | |||||
| [10.5 12.5]]]] | |||||
| """ | |||||
| def forward(self, inp): | |||||
| return adaptive_avg_pool2d(inp, self.oshp) | |||||
| @@ -11,7 +11,7 @@ from typing import Optional | |||||
| import numpy as np | import numpy as np | ||||
| from ..distributed.group import WORLD, Group | from ..distributed.group import WORLD, Group | ||||
| from ..functional import batch_norm2d, sync_batch_norm | |||||
| from ..functional.nn import batch_norm, sync_batch_norm | |||||
| from ..tensor import Parameter, Tensor | from ..tensor import Parameter, Tensor | ||||
| from . import init | from . import init | ||||
| from .module import Module | from .module import Module | ||||
| @@ -96,7 +96,7 @@ class _BatchNorm(Module): | |||||
| else: | else: | ||||
| exponential_average_factor = 0.0 # useless | exponential_average_factor = 0.0 # useless | ||||
| output = batch_norm2d( | |||||
| output = batch_norm( | |||||
| inp, | inp, | ||||
| self.running_mean if self.track_running_stats else None, | self.running_mean if self.track_running_stats else None, | ||||
| self.running_var if self.track_running_stats else None, | self.running_var if self.track_running_stats else None, | ||||
| @@ -113,6 +113,13 @@ class _BatchNorm(Module): | |||||
| return output | return output | ||||
| def _module_info_string(self) -> str: | |||||
| s = ( | |||||
| "{num_features}, eps={eps}, momentum={momentum}, affine={affine}, " | |||||
| "track_running_stats={track_running_stats}" | |||||
| ) | |||||
| return s.format(**self.__dict__) | |||||
| class SyncBatchNorm(_BatchNorm): | class SyncBatchNorm(_BatchNorm): | ||||
| r""" | r""" | ||||
| @@ -213,8 +220,8 @@ class BatchNorm2d(_BatchNorm): | |||||
| of 0.9. | of 0.9. | ||||
| If :attr:`track_running_stats` is set to ``False``, this layer will not | If :attr:`track_running_stats` is set to ``False``, this layer will not | ||||
| keep running estimates, and batch statistics are instead used during | |||||
| evaluation time. | |||||
| keep running estimates, batch statistics is used during | |||||
| evaluation time instead. | |||||
| .. note:: | .. note:: | ||||
| This :attr:`momentum` argument is different from one used in optimizer | This :attr:`momentum` argument is different from one used in optimizer | ||||
| @@ -229,15 +236,14 @@ class BatchNorm2d(_BatchNorm): | |||||
| Spatial Batch Normalization. | Spatial Batch Normalization. | ||||
| :type num_features: int | :type num_features: int | ||||
| :param num_features: usually the :math:`C` from an input of size | |||||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input with | |||||
| :param num_features: usually :math:`C` from an input of shape | |||||
| :math:`(N, C, H, W)` or the highest ranked dimension of an input | |||||
| less than 4D. | less than 4D. | ||||
| :type eps: float | :type eps: float | ||||
| :param eps: a value added to the denominator for numerical stability. | :param eps: a value added to the denominator for numerical stability. | ||||
| Default: 1e-5 | Default: 1e-5 | ||||
| :type momentum: float | :type momentum: float | ||||
| :param momentum: the value used for the `running_mean` and `running_var` | |||||
| computation. | |||||
| :param momentum: the value used for the ``running_mean`` and ``running_var`` computation. | |||||
| Default: 0.9 | Default: 0.9 | ||||
| :type affine: bool | :type affine: bool | ||||
| :param affine: a boolean value that when set to True, this module has | :param affine: a boolean value that when set to True, this module has | ||||
| @@ -70,6 +70,21 @@ class _ConvNd(Module): | |||||
| def _infer_bias_shape(self): | def _infer_bias_shape(self): | ||||
| pass | pass | ||||
| def _module_info_string(self): | |||||
| s = "{in_channels}, {out_channels}, kernel_size={kernel_size}" | |||||
| if self.stride != (1,) * len(self.stride): | |||||
| s += ", stride={stride}" | |||||
| if self.padding != (0,) * len(self.padding): | |||||
| s += ", padding={padding}" | |||||
| if self.dilation != (1,) * len(self.dilation): | |||||
| s += ", dilation={dilation}" | |||||
| if self.groups != 1: | |||||
| s += ", groups={groups}" | |||||
| if self.bias is None: | |||||
| s += ", bias=False" | |||||
| return s.format(**self.__dict__) | |||||
| class Conv2d(_ConvNd): | class Conv2d(_ConvNd): | ||||
| r"""Applies a 2D convolution over an input tensor. | r"""Applies a 2D convolution over an input tensor. | ||||
| @@ -84,8 +99,8 @@ class Conv2d(_ConvNd): | |||||
| \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k) | \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k) | ||||
| where :math:`\star` is the valid 2D cross-correlation operator, | where :math:`\star` is the valid 2D cross-correlation operator, | ||||
| :math:`N` is a batch size, :math:`C` denotes a number of channels, | |||||
| :math:`H` is a height of input planes in pixels, and :math:`W` is | |||||
| :math:`N` is batch size, :math:`C` denotes number of channels, | |||||
| :math:`H` is height of input planes in pixels, and :math:`W` is | |||||
| width in pixels. | width in pixels. | ||||
| When `groups == in_channels` and `out_channels == K * in_channels`, | When `groups == in_channels` and `out_channels == K * in_channels`, | ||||
| @@ -105,9 +120,8 @@ class Conv2d(_ConvNd): | |||||
| :param padding: size of the paddings added to the input on both sides of its | :param padding: size of the paddings added to the input on both sides of its | ||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | spatial dimensions. Only zero-padding is supported. Default: 0 | ||||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | :param dilation: dilation of the 2D convolution operation. Default: 1 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a "grouped convolution". When groups is not 1, | |||||
| in_channels and out_channels must be divisible by groups, | |||||
| :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | |||||
| and there would be an extra dimension at the beginning of the weight's | and there would be an extra dimension at the beginning of the weight's | ||||
| shape. Specifically, the shape of weight would be `(groups, | shape. Specifically, the shape of weight would be `(groups, | ||||
| out_channel // groups, in_channels // groups, *kernel_size)`. | out_channel // groups, in_channels // groups, *kernel_size)`. | ||||
| @@ -115,9 +129,9 @@ class Conv2d(_ConvNd): | |||||
| True | True | ||||
| :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: | :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: | ||||
| `CROSS_CORRELATION` | `CROSS_CORRELATION` | ||||
| :param compute_mode: When set to `DEFAULT`, no special requirements will be | |||||
| placed on the precision of intermediate results. When set to `FLOAT32`, | |||||
| float32 would be used for accumulator and intermediate result, but only | |||||
| :param compute_mode: When set to "DEFAULT", no special requirements will be | |||||
| placed on the precision of intermediate results. When set to "FLOAT32", | |||||
| "Float32" would be used for accumulator and intermediate result, but only | |||||
| effective when input and output are of float16 dtype. | effective when input and output are of float16 dtype. | ||||
| Examples: | Examples: | ||||
| @@ -221,7 +235,7 @@ class ConvTranspose2d(_ConvNd): | |||||
| r"""Applies a 2D transposed convolution over an input tensor. | r"""Applies a 2D transposed convolution over an input tensor. | ||||
| This module is also known as a deconvolution or a fractionally-strided convolution. | This module is also known as a deconvolution or a fractionally-strided convolution. | ||||
| :class:`ConvTranspose2d` can ben seen as the gradient of :class:`Conv2d` operation | |||||
| :class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation | |||||
| with respect to its input. | with respect to its input. | ||||
| Convolution usually reduces the size of input, while transposed convolution works | Convolution usually reduces the size of input, while transposed convolution works | ||||
| @@ -237,8 +251,7 @@ class ConvTranspose2d(_ConvNd): | |||||
| :param padding: size of the paddings added to the input on both sides of its | :param padding: size of the paddings added to the input on both sides of its | ||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | spatial dimensions. Only zero-padding is supported. Default: 0 | ||||
| :param dilation: dilation of the 2D convolution operation. Default: 1 | :param dilation: dilation of the 2D convolution operation. Default: 1 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||||
| :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | ``in_channels`` and ``out_channels`` must be divisible by ``groups``, | ||||
| and there would be an extra dimension at the beginning of the weight's | and there would be an extra dimension at the beginning of the weight's | ||||
| shape. Specifically, the shape of weight would be ``(groups, | shape. Specifically, the shape of weight would be ``(groups, | ||||
| @@ -247,9 +260,9 @@ class ConvTranspose2d(_ConvNd): | |||||
| True | True | ||||
| :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: | :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: | ||||
| `CROSS_CORRELATION` | `CROSS_CORRELATION` | ||||
| :param compute_mode: When set to `DEFAULT`, no special requirements will be | |||||
| placed on the precision of intermediate results. When set to `FLOAT32`, | |||||
| float32 would be used for accumulator and intermediate result, but only | |||||
| :param compute_mode: When set to "DEFAULT", no special requirements will be | |||||
| placed on the precision of intermediate results. When set to "FLOAT32", | |||||
| "Float32" would be used for accumulator and intermediate result, but only | |||||
| effective when input and output are of float16 dtype. | effective when input and output are of float16 dtype. | ||||
| """ | """ | ||||
| @@ -327,7 +340,7 @@ class ConvTranspose2d(_ConvNd): | |||||
| class LocalConv2d(Conv2d): | class LocalConv2d(Conv2d): | ||||
| r"""Applies a spatial convolution with untied kernels over an input 4D tensor. | |||||
| r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor. | |||||
| It is also known as the locally connected layer. | It is also known as the locally connected layer. | ||||
| :param in_channels: number of input channels. | :param in_channels: number of input channels. | ||||
| @@ -340,9 +353,9 @@ class LocalConv2d(Conv2d): | |||||
| :param stride: stride of the 2D convolution operation. Default: 1 | :param stride: stride of the 2D convolution operation. Default: 1 | ||||
| :param padding: size of the paddings added to the input on both sides of its | :param padding: size of the paddings added to the input on both sides of its | ||||
| spatial dimensions. Only zero-padding is supported. Default: 0 | spatial dimensions. Only zero-padding is supported. Default: 0 | ||||
| :param groups: number of groups to divide input and output channels into, | |||||
| so as to perform a "grouped convolution". When groups is not 1, | |||||
| in_channels and out_channels must be divisible by groups. | |||||
| :param groups: number of groups into which the input and output channels are divided, | |||||
| so as to perform a "grouped convolution". When ``groups`` is not 1, | |||||
| ``in_channels`` and ``out_channels`` must be divisible by ``groups``. | |||||
| The shape of weight is `(groups, output_height, output_width, | The shape of weight is `(groups, output_height, output_width, | ||||
| in_channels // groups, *kernel_size, out_channels // groups)`. | in_channels // groups, *kernel_size, out_channels // groups)`. | ||||
| """ | """ | ||||
| @@ -11,7 +11,7 @@ from .module import Module | |||||
| class Dropout(Module): | class Dropout(Module): | ||||
| r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training. | |||||
| r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training. | |||||
| Commonly used in large networks to prevent overfitting. | Commonly used in large networks to prevent overfitting. | ||||
| Note that we perform dropout only during training, we also rescale(multiply) the output tensor | Note that we perform dropout only during training, we also rescale(multiply) the output tensor | ||||
| by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. | ||||
| @@ -28,3 +28,6 @@ class Dropout(Module): | |||||
| return dropout(inputs, self.drop_prob, training=True) | return dropout(inputs, self.drop_prob, training=True) | ||||
| else: | else: | ||||
| return inputs | return inputs | ||||
| def _module_info_string(self) -> str: | |||||
| return "drop_prob={drop_prob}".format(drop_prob=self.drop_prob) | |||||
| @@ -34,7 +34,7 @@ class Elemwise(Module): | |||||
| * "EXP": exp(x) | * "EXP": exp(x) | ||||
| * "TANH": tanh(x) | * "TANH": tanh(x) | ||||
| * "FUSE_MUL_ADD3": x * y + z | * "FUSE_MUL_ADD3": x * y + z | ||||
| * "FAST_TANH": fast_tanh(x) | |||||
| * "FAST_TANH": x * (27. + x * x) / (27. + 9. * x * x) | |||||
| * "NEGATE": -x | * "NEGATE": -x | ||||
| * "ACOS": acos(x) | * "ACOS": acos(x) | ||||
| * "ASIN": asin(x) | * "ASIN": asin(x) | ||||
| @@ -56,9 +56,9 @@ class Elemwise(Module): | |||||
| * "SIGMOID_GRAD": sigmoid_grad | * "SIGMOID_GRAD": sigmoid_grad | ||||
| * "SWITCH_GT0": switch_gt0 | * "SWITCH_GT0": switch_gt0 | ||||
| * "TANH_GRAD": tanh_grad | * "TANH_GRAD": tanh_grad | ||||
| * "LT": lt | |||||
| * "LT": less | |||||
| * "LEQ": leq | * "LEQ": leq | ||||
| * "EQ": eq | |||||
| * "EQ": equal | |||||
| * "POW": pow | * "POW": pow | ||||
| * "LOG_SUM_EXP": log_sum_exp | * "LOG_SUM_EXP": log_sum_exp | ||||
| * "FAST_TANH_GRAD": fast_tanh_grad | * "FAST_TANH_GRAD": fast_tanh_grad | ||||
| @@ -10,7 +10,7 @@ from typing import Optional | |||||
| import numpy as np | import numpy as np | ||||
| from ..functional import embedding as embedding_func | |||||
| from ..functional.nn import embedding as embedding_func | |||||
| from ..tensor import Parameter | from ..tensor import Parameter | ||||
| from . import init | from . import init | ||||
| from .module import Module | from .module import Module | ||||
| @@ -26,9 +26,9 @@ class Embedding(Module): | |||||
| :param num_embeddings: size of embedding dictionary. | :param num_embeddings: size of embedding dictionary. | ||||
| :param embedding_dim: size of each embedding vector. | :param embedding_dim: size of each embedding vector. | ||||
| :param padding_idx: should be set to None, not support now. | |||||
| :param max_norm: should be set to None, not support now. | |||||
| :param norm_type: should be set to None, not support now. | |||||
| :param padding_idx: should be set to None, not supportted now. | |||||
| :param max_norm: should be set to None, not supportted now. | |||||
| :param norm_type: should be set to None, not supportted now. | |||||
| :param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | :param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). | ||||
| Examples: | Examples: | ||||
| @@ -121,8 +121,8 @@ class Embedding(Module): | |||||
| r""" | r""" | ||||
| Creates Embedding instance from given 2-dimensional FloatTensor. | Creates Embedding instance from given 2-dimensional FloatTensor. | ||||
| :param embeddings: Tensor contained weight for the embedding. | |||||
| :param freeze: If ``True``, the weight does not get updated during the learning process. Default: ``True``. | |||||
| :param embeddings: tensor contained weight for the embedding. | |||||
| :param freeze: if ``True``, the weight does not get updated during the learning process. Default: True. | |||||
| :param padding_idx: should be set to None, not support Now. | :param padding_idx: should be set to None, not support Now. | ||||
| :param max_norm: should be set to None, not support Now. | :param max_norm: should be set to None, not support Now. | ||||
| :param norm_type: should be set to None, not support Now. | :param norm_type: should be set to None, not support Now. | ||||
| @@ -6,7 +6,7 @@ | |||||
| # Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| from ..functional import identity | |||||
| from ..functional import copy | |||||
| from .module import Module | from .module import Module | ||||
| @@ -14,4 +14,4 @@ class Identity(Module): | |||||
| r"""A placeholder identity operator that will ignore any argument.""" | r"""A placeholder identity operator that will ignore any argument.""" | ||||
| def forward(self, x): | def forward(self, x): | ||||
| return identity(x) | |||||
| return copy(x) | |||||
| @@ -18,48 +18,48 @@ from ..tensor import Tensor | |||||
| def fill_(tensor: Tensor, val: Union[float, int]) -> None: | def fill_(tensor: Tensor, val: Union[float, int]) -> None: | ||||
| """Fill the given ``tensor`` with value ``val``. | |||||
| """Fills the given ``tensor`` with value ``val``. | |||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param val: The value to be filled throughout the tensor | |||||
| :param tensor: tensor to be initialized. | |||||
| :param val: value to be filled throughout the tensor. | |||||
| """ | """ | ||||
| tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) | tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) | ||||
| def zeros_(tensor: Tensor) -> None: | def zeros_(tensor: Tensor) -> None: | ||||
| """Fill the given ``tensor`` with scalar value `0`. | |||||
| """Fills the given ``tensor`` with scalar value `0`. | |||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param tensor: tensor to be initialized. | |||||
| """ | """ | ||||
| fill_(tensor, 0) | fill_(tensor, 0) | ||||
| def ones_(tensor: Tensor) -> None: | def ones_(tensor: Tensor) -> None: | ||||
| """Fill the given ``tensor`` with the scalar value `1`. | |||||
| """Fills the given ``tensor`` with the scalar value `1`. | |||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param tensor: tensor to be initialized. | |||||
| """ | """ | ||||
| fill_(tensor, 1) | fill_(tensor, 1) | ||||
| def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: | ||||
| r"""Fill the given ``tensor`` with random value sampled from uniform distribution | |||||
| r"""Fills the given ``tensor`` with random value sampled from uniform distribution | |||||
| :math:`\mathcal{U}(\text{a}, \text{b})`. | :math:`\mathcal{U}(\text{a}, \text{b})`. | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param a: Lower bound of the sampling interval | |||||
| :param b: Upper bound of the sampling interval | |||||
| :param tensor: tensor to be initialized. | |||||
| :param a: lower bound of the sampling interval. | |||||
| :param b: upper bound of the sampling interval. | |||||
| """ | """ | ||||
| tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype)) | tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype)) | ||||
| def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | ||||
| r"""Fill the given ``tensor`` with random value sampled from normal distribution | |||||
| r"""Fills the given ``tensor`` with random value sampled from normal distribution | |||||
| :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param mean: The mean of the normal distribution | |||||
| :param std: The standard deviation of the normal distribution | |||||
| :param tensor: tensor to be initialized. | |||||
| :param mean: mean of the normal distribution. | |||||
| :param std: standard deviation of the normal distribution. | |||||
| """ | """ | ||||
| tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype)) | tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype)) | ||||
| @@ -67,7 +67,7 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: | |||||
| def calculate_gain( | def calculate_gain( | ||||
| nonlinearity: str, param: Optional[Union[int, float]] = None | nonlinearity: str, param: Optional[Union[int, float]] = None | ||||
| ) -> float: | ) -> float: | ||||
| r"""Return a recommended gain value (see the table below) for the given nonlinearity | |||||
| r"""Returns a recommended gain value (see the table below) for the given nonlinearity | |||||
| function. | function. | ||||
| ================= ==================================================== | ================= ==================================================== | ||||
| @@ -81,8 +81,8 @@ def calculate_gain( | |||||
| Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}` | Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}` | ||||
| ================= ==================================================== | ================= ==================================================== | ||||
| :param nonlinearity: Name of the non-linear function | |||||
| :param param: Optional parameter for leaky_relu. Only effective when | |||||
| :param nonlinearity: name of the non-linear function. | |||||
| :param param: optional parameter for leaky_relu. Only effective when | |||||
| ``nonlinearity`` is "leaky_relu". | ``nonlinearity`` is "leaky_relu". | ||||
| """ | """ | ||||
| @@ -119,10 +119,10 @@ def calculate_gain( | |||||
| def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | ||||
| """ | """ | ||||
| Calculate fan_in / fan_out value for given weight tensor. This function assumes | |||||
| input tensor is stored in NCHW format. | |||||
| Calculates fan_in / fan_out value for given weight tensor. This function assumes | |||||
| input tensor is stored in ``NCHW`` format. | |||||
| :param tensor: Weight tensor in NCHW format | |||||
| :param tensor: weight tensor in ``NCHW`` format. | |||||
| """ | """ | ||||
| shape = tensor.shape | shape = tensor.shape | ||||
| ndim = len(shape) | ndim = len(shape) | ||||
| @@ -148,13 +148,13 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: | |||||
| def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | ||||
| """ | """ | ||||
| Calculate fan_in or fan_out value for given weight tensor, depending on given | |||||
| Calculates fan_in / fan_out value for given weight tensor, depending on given | |||||
| ``mode``. | ``mode``. | ||||
| See :func:`calculate_fan_in_and_fan_out` for details. | See :func:`calculate_fan_in_and_fan_out` for details. | ||||
| :param tensor: Weight tensor in NCHW format | |||||
| :param mode: ``'fan_in'`` or ``'fan_out'`` | |||||
| :param tensor: weight tensor in ``NCHW`` format. | |||||
| :param mode: "fan_in" or "fan_out". | |||||
| """ | """ | ||||
| mode = mode.lower() | mode = mode.lower() | ||||
| valid_modes = ["fan_in", "fan_out"] | valid_modes = ["fan_in", "fan_out"] | ||||
| @@ -168,7 +168,7 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float: | |||||
| def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | ||||
| r"""Fill ``tensor`` with random values sampled from :math:`\mathcal{U}(-a, a)` | |||||
| r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)` | |||||
| where | where | ||||
| .. math:: | .. math:: | ||||
| @@ -178,8 +178,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
| `Understanding the difficulty of training deep feedforward neural networks` - | `Understanding the difficulty of training deep feedforward neural networks` - | ||||
| Glorot, X. & Bengio, Y. (2010). | Glorot, X. & Bengio, Y. (2010). | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param gain: Scaling factor for :math:`a`. | |||||
| :param tensor: tensor to be initialized. | |||||
| :param gain: scaling factor for :math:`a`. | |||||
| """ | """ | ||||
| fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | ||||
| std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | ||||
| @@ -188,7 +188,7 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
| def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | ||||
| r"""Fill ``tensor`` with random values sampled from | |||||
| r"""Fills tensor with random values sampled from | |||||
| :math:`\mathcal{N}(0, \text{std}^2)` where | :math:`\mathcal{N}(0, \text{std}^2)` where | ||||
| .. math:: | .. math:: | ||||
| @@ -198,8 +198,8 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
| `Understanding the difficulty of training deep feedforward neural networks` - | `Understanding the difficulty of training deep feedforward neural networks` - | ||||
| Glorot, X. & Bengio, Y. (2010). | Glorot, X. & Bengio, Y. (2010). | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param gain: Scaling factor for :math:`std`. | |||||
| :param tensor: tensor to be initialized. | |||||
| :param gain: scaling factor for :math:`std`. | |||||
| """ | """ | ||||
| fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) | ||||
| std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) | ||||
| @@ -209,7 +209,7 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: | |||||
| def msra_uniform_( | def msra_uniform_( | ||||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | ||||
| ) -> None: | ) -> None: | ||||
| r"""Fill ``tensor`` wilth random values sampled from | |||||
| r"""Fills tensor wilth random values sampled from | |||||
| :math:`\mathcal{U}(-\text{bound}, \text{bound})` where | :math:`\mathcal{U}(-\text{bound}, \text{bound})` where | ||||
| .. math:: | .. math:: | ||||
| @@ -219,13 +219,13 @@ def msra_uniform_( | |||||
| `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | ||||
| classification` | classification` | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param a: Optional parameter for calculating gain for leaky_relu. See | |||||
| :param tensor: tensor to be initialized. | |||||
| :param a: optional parameter for calculating gain for leaky_relu. See | |||||
| :func:`calculate_gain` for details. | :func:`calculate_gain` for details. | ||||
| :param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the | |||||
| :param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||||
| scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for | ||||
| details. | details. | ||||
| :param nonlinearity: Name of the non-linear function used to calculate :math:`gain`. | |||||
| :param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||||
| See :func:`calculate_gain` for details. | See :func:`calculate_gain` for details. | ||||
| """ | """ | ||||
| fan = calculate_correct_fan(tensor, mode) | fan = calculate_correct_fan(tensor, mode) | ||||
| @@ -238,7 +238,7 @@ def msra_uniform_( | |||||
| def msra_normal_( | def msra_normal_( | ||||
| tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" | ||||
| ) -> None: | ) -> None: | ||||
| r"""Fill ``tensor`` wilth random values sampled from | |||||
| r"""Fills tensor wilth random values sampled from | |||||
| :math:`\mathcal{N}(0, \text{std}^2)` where | :math:`\mathcal{N}(0, \text{std}^2)` where | ||||
| .. math:: | .. math:: | ||||
| @@ -248,13 +248,13 @@ def msra_normal_( | |||||
| `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | `Delving deep into rectifiers: Surpassing human-level performance on ImageNet | ||||
| classification` | classification` | ||||
| :param tensor: An n-dimentional tensor to be initialized | |||||
| :param a: Optional parameter for calculating gain for leaky_relu. See | |||||
| :param tensor: tensor to be initialized | |||||
| :param a: optional parameter for calculating gain for leaky_relu. See | |||||
| :func:`calculate_gain` for details. | :func:`calculate_gain` for details. | ||||
| :param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the | |||||
| :param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the | |||||
| scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for | ||||
| details. | details. | ||||
| :param nonlinearity: Name of the non-linear function used to calculate :math:`gain`. | |||||
| :param nonlinearity: name of the non-linear function used to calculate :math:`gain`. | |||||
| See :func:`calculate_gain` for details. | See :func:`calculate_gain` for details. | ||||
| """ | """ | ||||
| fan = calculate_correct_fan(tensor, mode) | fan = calculate_correct_fan(tensor, mode) | ||||
| @@ -7,7 +7,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import numpy as np | import numpy as np | ||||
| from ..functional import linear | |||||
| from ..functional.nn import linear | |||||
| from ..tensor import Parameter | from ..tensor import Parameter | ||||
| from . import init | from . import init | ||||
| from .module import Module | from .module import Module | ||||
| @@ -25,7 +25,7 @@ class Linear(Module): | |||||
| :param in_features: size of each input sample. | :param in_features: size of each input sample. | ||||
| :param out_features: size of each output sample. | :param out_features: size of each output sample. | ||||
| :param bias: If set to ``False``, the layer will not learn an additive bias. | |||||
| :param bias: if it's ``False``, the layer will not learn an additional ``bias``. | |||||
| Default: ``True`` | Default: ``True`` | ||||
| Examples: | Examples: | ||||
| @@ -78,3 +78,8 @@ class Linear(Module): | |||||
| def forward(self, x): | def forward(self, x): | ||||
| return self._calc_linear(x, self.weight, self.bias) | return self._calc_linear(x, self.weight, self.bias) | ||||
| def _module_info_string(self) -> str: | |||||
| return "in_features={}, out_features={}, bias={}".format( | |||||
| self.in_features, self.out_features, self.bias is not None | |||||
| ) | |||||
| @@ -69,14 +69,14 @@ class Module(metaclass=ABCMeta): | |||||
| self._forward_pre_hooks = OrderedDict() | self._forward_pre_hooks = OrderedDict() | ||||
| self._forward_hooks = OrderedDict() | self._forward_hooks = OrderedDict() | ||||
| self._modules = [] | |||||
| @abstractmethod | @abstractmethod | ||||
| def forward(self, inputs): | def forward(self, inputs): | ||||
| pass | pass | ||||
| def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | def register_forward_pre_hook(self, hook: Callable) -> HookHandler: | ||||
| """Register a hook to handle forward inputs. `hook` should be a function | |||||
| Note that `inputs` keyword inputs | |||||
| """Registers a hook to handle forward inputs. `hook` should be a function. | |||||
| :param hook: a function that receive `module` and `inputs`, then return | :param hook: a function that receive `module` and `inputs`, then return | ||||
| a modified `inputs` or `None`. | a modified `inputs` or `None`. | ||||
| @@ -85,7 +85,7 @@ class Module(metaclass=ABCMeta): | |||||
| return HookHandler(self._forward_pre_hooks, hook) | return HookHandler(self._forward_pre_hooks, hook) | ||||
| def register_forward_hook(self, hook: Callable) -> HookHandler: | def register_forward_hook(self, hook: Callable) -> HookHandler: | ||||
| """Register a hook to handle forward results. `hook` should be a function that | |||||
| """Registers a hook to handle forward results. `hook` should be a function that | |||||
| receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. | ||||
| This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. | ||||
| @@ -124,12 +124,12 @@ class Module(metaclass=ABCMeta): | |||||
| returned iterable is guaranteed to be identical, as long as all the involved | returned iterable is guaranteed to be identical, as long as all the involved | ||||
| module objects' ``__dict__`` does not change thoughout those calls. | module objects' ``__dict__`` does not change thoughout those calls. | ||||
| :param recursive: Whether to recursively scan all the submodules. | |||||
| :param with_key: Whether to yield keys along with yielded objects. | |||||
| :param with_parent: Whether to yield ``self`` along with yielded objects. | |||||
| :param prefix: The prefix appended to the yielded keys. | |||||
| :param predicate: The predicate function applied to scanned objects. | |||||
| :param seen: A dict that records whether a module has been traversed yet. | |||||
| :param recursive: whether to recursively scan all the submodules. | |||||
| :param with_key: whether to yield keys along with yielded objects. | |||||
| :param with_parent: whether to yield ``self`` along with yielded objects. | |||||
| :param prefix: prefix appended to the yielded keys. | |||||
| :param predicate: the predication function applied to scanned objects. | |||||
| :param seen: a dict that records whether a module has been traversed yet. | |||||
| """ | """ | ||||
| if seen is None: | if seen is None: | ||||
| seen = set([id(self)]) | seen = set([id(self)]) | ||||
| @@ -191,10 +191,10 @@ class Module(metaclass=ABCMeta): | |||||
| self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | self, prefix: Optional[str] = None, recursive: bool = True, **kwargs | ||||
| ) -> Iterable[Tuple[str, Parameter]]: | ) -> Iterable[Tuple[str, Parameter]]: | ||||
| """Returns an iterable for key :class:`~.Parameter` pairs of the module, where | """Returns an iterable for key :class:`~.Parameter` pairs of the module, where | ||||
| ``key`` is the dotted path from this module to the :class:`~.Parameter` . | |||||
| ``key`` is the dotted path from this module to the :class:`~.Parameter`. | |||||
| :param prefix: The prefix prepended to the keys. | |||||
| :param recursive: If ``True``, returns all :class:`~.Parameter` within this | |||||
| :param prefix: prefix prepended to the keys. | |||||
| :param recursive: if ``True``, returns all :class:`~.Parameter` within this | |||||
| module, else only returns :class:`~.Parameter` that are direct attributes | module, else only returns :class:`~.Parameter` that are direct attributes | ||||
| of this module. | of this module. | ||||
| """ | """ | ||||
| @@ -223,7 +223,7 @@ class Module(metaclass=ABCMeta): | |||||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | ||||
| :param recursive: If ``True``, returns all buffers within this | |||||
| :param recursive: if ``True``, returns all buffers within this | |||||
| module, else only returns buffers that are direct attributes | module, else only returns buffers that are direct attributes | ||||
| of this module. | of this module. | ||||
| """ | """ | ||||
| @@ -239,8 +239,8 @@ class Module(metaclass=ABCMeta): | |||||
| Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. | ||||
| :param prefix: The prefix prepended to the keys. | |||||
| :param recursive: If ``True``, returns all buffers within this | |||||
| :param prefix: prefix prepended to the keys. | |||||
| :param recursive: if ``True``, returns all buffers within this | |||||
| module, else only returns buffers that are direct attributes | module, else only returns buffers that are direct attributes | ||||
| of this module. | of this module. | ||||
| """ | """ | ||||
| @@ -285,7 +285,7 @@ class Module(metaclass=ABCMeta): | |||||
| module, including itself, where 'key' is the dotted path from this module to the | module, including itself, where 'key' is the dotted path from this module to the | ||||
| submodules. | submodules. | ||||
| :param prefix: The prefix prepended to the path. | |||||
| :param prefix: prefix prepended to the path. | |||||
| """ | """ | ||||
| if "with_parent" in kwargs and kwargs["with_parent"]: | if "with_parent" in kwargs and kwargs["with_parent"]: | ||||
| yield ("" if prefix is None else prefix), self, None | yield ("" if prefix is None else prefix), self, None | ||||
| @@ -296,24 +296,24 @@ class Module(metaclass=ABCMeta): | |||||
| ) | ) | ||||
| def apply(self, fn: "Callable[[Module], Any]") -> None: | def apply(self, fn: "Callable[[Module], Any]") -> None: | ||||
| """Apply function ``fn`` to all the modules within this module, including | |||||
| """Applies function ``fn`` to all the modules within this module, including | |||||
| itself. | itself. | ||||
| :param fn: The function to be applied on modules. | |||||
| :param fn: the function to be applied on modules. | |||||
| """ | """ | ||||
| for it in self.modules(): | for it in self.modules(): | ||||
| fn(it) | fn(it) | ||||
| @deprecated(version="1.0") | @deprecated(version="1.0") | ||||
| def zero_grad(self) -> None: | def zero_grad(self) -> None: | ||||
| """Set all parameters' grads to zero | |||||
| """Sets all parameters' grads to zero | |||||
| """ | """ | ||||
| for param in self.parameters(): | for param in self.parameters(): | ||||
| if param.grad is not None: | if param.grad is not None: | ||||
| param.grad.reset_zero() | param.grad.reset_zero() | ||||
| def train(self, mode: bool = True, recursive: bool = True) -> None: | def train(self, mode: bool = True, recursive: bool = True) -> None: | ||||
| """Set training mode of all the modules within this module (including itself) to | |||||
| """Sets training mode of all the modules within this module (including itself) to | |||||
| ``mode``. This effectively sets the ``training`` attributes of those modules | ``mode``. This effectively sets the ``training`` attributes of those modules | ||||
| to ``mode``, but only has effect on certain modules (e.g. | to ``mode``, but only has effect on certain modules (e.g. | ||||
| :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) | ||||
| @@ -331,14 +331,14 @@ class Module(metaclass=ABCMeta): | |||||
| self.apply(fn) | self.apply(fn) | ||||
| def eval(self) -> None: | def eval(self) -> None: | ||||
| """Set training mode of all the modules within this module (including itself) to | |||||
| """Sets training mode of all the modules within this module (including itself) to | |||||
| ``False``. See :meth:`~.Module.train` for details. | ``False``. See :meth:`~.Module.train` for details. | ||||
| """ | """ | ||||
| self.train(False) | self.train(False) | ||||
| def disable_quantize(self, value=True): | def disable_quantize(self, value=True): | ||||
| r""" | r""" | ||||
| Set ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||||
| Sets ``module``'s ``quantize_disabled`` attribute and return ``module``. | |||||
| Could be used as a decorator. | Could be used as a decorator. | ||||
| """ | """ | ||||
| @@ -351,7 +351,7 @@ class Module(metaclass=ABCMeta): | |||||
| def replace_param( | def replace_param( | ||||
| self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | self, params: dict, start_pos: int, seen: Optional[Set[int]] = None | ||||
| ): | ): | ||||
| """Replace module's parameters with `params`, used by :class:`~.ParamPack` to | |||||
| """Replaces module's parameters with `params`, used by :class:`~.ParamPack` to | |||||
| speedup multimachine training. | speedup multimachine training. | ||||
| """ | """ | ||||
| offset = 0 | offset = 0 | ||||
| @@ -407,7 +407,7 @@ class Module(metaclass=ABCMeta): | |||||
| state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], | ||||
| strict=True, | strict=True, | ||||
| ): | ): | ||||
| r"""Load a given dictionary created by :func:`state_dict` into this module. | |||||
| r"""Loads a given dictionary created by :func:`state_dict` into this module. | |||||
| If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys | ||||
| returned by :func:`state_dict`. | returned by :func:`state_dict`. | ||||
| @@ -518,3 +518,57 @@ class Module(metaclass=ABCMeta): | |||||
| loaded.append(k) | loaded.append(k) | ||||
| return set(loaded), set(skipped) | return set(loaded), set(skipped) | ||||
| def __setattr__(self, name: str, value): | |||||
| if _is_module(value): | |||||
| modules = self.__dict__.get("_modules") | |||||
| if modules is None: | |||||
| raise AttributeError( | |||||
| "cannot assign module before Module.__init__() call" | |||||
| ) | |||||
| if name not in self.__dict__: | |||||
| modules.append(name) | |||||
| super().__setattr__(name, value) | |||||
| def __delattr__(self, name: str): | |||||
| if name in self.__dict__ and _is_module(self.__dict__[name]): | |||||
| modules = self.__dict__.get("_modules") | |||||
| modules.remove(name) | |||||
| super().__delattr__(name) | |||||
| def _module_info_string(self) -> str: | |||||
| r"""Set the extra representation of the module. | |||||
| """ | |||||
| return "" | |||||
| def __repr__(self): | |||||
| def add_indent(repr_str, num_spaces): | |||||
| s = repr_str.split("\n") | |||||
| # don't do anything for single-line stuff | |||||
| if len(s) == 1: | |||||
| return repr_str | |||||
| first = s.pop(0) | |||||
| s = [(num_spaces * " ") + line for line in s] | |||||
| s = "\n".join(s) | |||||
| s = first + "\n" + s | |||||
| return s | |||||
| extra_lines = [] | |||||
| extra_repr = self._module_info_string() | |||||
| if extra_repr: | |||||
| extra_lines = extra_repr.split("\n") | |||||
| child_lines = [ | |||||
| "(" + name + "): " + add_indent(repr(self.__dict__[name]), 2) | |||||
| for name in self._modules | |||||
| ] | |||||
| lines = extra_lines + child_lines | |||||
| main_str = self.__class__.__name__ + "(" | |||||
| if lines: | |||||
| # simple one-liner info, which most builtin Modules will use | |||||
| if len(extra_lines) == 1 and not child_lines: | |||||
| main_str += extra_lines[0] | |||||
| else: | |||||
| main_str += "\n " + "\n ".join(lines) + "\n" | |||||
| main_str += ")" | |||||
| return main_str | |||||
| @@ -29,6 +29,11 @@ class _PoolNd(Module): | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| pass | pass | ||||
| def _module_info_string(self) -> str: | |||||
| return "kernel_size={kernel_size}, stride={stride}, padding={padding}".format( | |||||
| **self.__dict__ | |||||
| ) | |||||
| class MaxPool2d(_PoolNd): | class MaxPool2d(_PoolNd): | ||||
| r"""Applies a 2D max pooling over an input. | r"""Applies a 2D max pooling over an input. | ||||
| @@ -5,7 +5,7 @@ | |||||
| # Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| from ...functional import add_update, ones, relu, sqrt, sum, zeros | |||||
| from ...functional import ones, relu, sqrt, sum, zeros | |||||
| from ...quantization.utils import fake_quant_bias | from ...quantization.utils import fake_quant_bias | ||||
| from .. import conv_bn as Float | from .. import conv_bn as Float | ||||
| from .module import QATModule | from .module import QATModule | ||||
| @@ -76,18 +76,10 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule): | |||||
| bn_var.detach() * num_elements_per_channel / (num_elements_per_channel - 1) | bn_var.detach() * num_elements_per_channel / (num_elements_per_channel - 1) | ||||
| ) | ) | ||||
| exponential_average_factor = 1 - self.bn.momentum | exponential_average_factor = 1 - self.bn.momentum | ||||
| add_update( | |||||
| self.bn.running_mean, | |||||
| delta=bn_mean, | |||||
| alpha=1 - exponential_average_factor, | |||||
| beta=exponential_average_factor, | |||||
| ) | |||||
| add_update( | |||||
| self.bn.running_var, | |||||
| delta=bn_var, | |||||
| alpha=1 - exponential_average_factor, | |||||
| beta=exponential_average_factor, | |||||
| ) | |||||
| self.bn.running_mean *= self.bn.momentum | |||||
| self.bn.running_mean += exponential_average_factor * bn_mean | |||||
| self.bn.running_var *= self.bn.momentum | |||||
| self.bn.running_var += exponential_average_factor * bn_var | |||||
| def calc_conv_bn_qat(self, inp, approx=True): | def calc_conv_bn_qat(self, inp, approx=True): | ||||
| if self.training and not approx: | if self.training and not approx: | ||||
| @@ -18,7 +18,7 @@ class Linear(Float.Linear, QATModule): | |||||
| :param in_features: size of each input sample. | :param in_features: size of each input sample. | ||||
| :param out_features: size of each output sample. | :param out_features: size of each output sample. | ||||
| :param bias: If set to ``False``, the layer will not learn an additive bias. | :param bias: If set to ``False``, the layer will not learn an additive bias. | ||||
| Default: ``True`` | |||||
| Default: True | |||||
| """ | """ | ||||
| @@ -52,7 +52,7 @@ class QATModule(Module): | |||||
| self.weight_fake_quant = safe_call(qconfig.weight_fake_quant) | self.weight_fake_quant = safe_call(qconfig.weight_fake_quant) | ||||
| def _enable_exec(self, with_module, func, enable): | def _enable_exec(self, with_module, func, enable): | ||||
| if not with_module: | |||||
| if not with_module or not func: | |||||
| return | return | ||||
| if enable: | if enable: | ||||
| func.enable() | func.enable() | ||||
| @@ -15,7 +15,7 @@ from .module import QuantizedModule | |||||
| class Concat(QuantizedModule): | class Concat(QuantizedModule): | ||||
| r""" | r""" | ||||
| A :class:`~.QuantizedModule` to do quantized concat, inference only. | |||||
| A :class:`~.QuantizedModule` to do quantized concat, used for inference only. | |||||
| """ | """ | ||||
| def __init__(self, dtype=None): | def __init__(self, dtype=None): | ||||
| @@ -29,7 +29,7 @@ class Concat(QuantizedModule): | |||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT.Concat): | def from_qat_module(cls, qat_module: QAT.Concat): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| return cls(qat_module.get_activation_dtype()) | return cls(qat_module.get_activation_dtype()) | ||||
| @@ -11,17 +11,17 @@ import numpy as np | |||||
| from ... import module as Float | from ... import module as Float | ||||
| from ...core.tensor import dtype | from ...core.tensor import dtype | ||||
| from ...functional import conv_bias_activation | |||||
| from ...functional.nn import conv_bias_activation | |||||
| from ...tensor import Parameter | from ...tensor import Parameter | ||||
| from ..qat import conv as QAT | from ..qat import conv as QAT | ||||
| from .module import QuantizedModule | from .module import QuantizedModule | ||||
| class Conv2d(Float.Conv2d, QuantizedModule): | class Conv2d(Float.Conv2d, QuantizedModule): | ||||
| r"""quantized version of :class:`~.qat.conv.Conv2d`.""" | |||||
| r"""Applies a 2D convolution over an quantized input tensor, inference only. | |||||
| r"""Quantized version of :class:`~.qat.conv.Conv2d`.""" | |||||
| r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
| The parameter is same with :class: `~.Conv2d` | |||||
| The parameter is same with :class: `~.Conv2d`. | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||
| @@ -101,7 +101,7 @@ class Conv2d(Float.Conv2d, QuantizedModule): | |||||
| class ConvRelu2d(Conv2d): | class ConvRelu2d(Conv2d): | ||||
| r"""quantized version of :class:`~.qat.conv.ConvRelu2d`.""" | |||||
| r"""Quantized version of :class:`~.qat.conv.ConvRelu2d`.""" | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| return self.calc_conv_quantized(inp, nonlinear_mode="RELU") | return self.calc_conv_quantized(inp, nonlinear_mode="RELU") | ||||
| @@ -11,15 +11,15 @@ from .conv import Conv2d | |||||
| class _ConvBnActivation2d(Conv2d): | class _ConvBnActivation2d(Conv2d): | ||||
| r"""Applies a 2D convolution over an quantized input tensor, inference only. | |||||
| r"""Applies a 2D convolution over a quantized input tensor, used for inference only. | |||||
| The parameter is same with :class: `~.Conv2d` | |||||
| The parameter is same with :class: `~.Conv2d`. | |||||
| """ | """ | ||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT._ConvBnActivation2d): | def from_qat_module(cls, qat_module: QAT._ConvBnActivation2d): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| output_dtype = qat_module.get_activation_dtype() | output_dtype = qat_module.get_activation_dtype() | ||||
| @@ -43,14 +43,14 @@ class _ConvBnActivation2d(Conv2d): | |||||
| class ConvBn2d(_ConvBnActivation2d): | class ConvBn2d(_ConvBnActivation2d): | ||||
| r"""quantized version of :class:`~.qat.conv_bn.ConvBn2d`.""" | |||||
| r"""Quantized version of :class:`~.qat.conv_bn.ConvBn2d`.""" | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") | return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") | ||||
| class ConvBnRelu2d(_ConvBnActivation2d): | class ConvBnRelu2d(_ConvBnActivation2d): | ||||
| r"""quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`.""" | |||||
| r"""Quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`.""" | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| return self.calc_conv_quantized(inp, nonlinear_mode="RELU") | return self.calc_conv_quantized(inp, nonlinear_mode="RELU") | ||||
| @@ -13,7 +13,7 @@ from .module import QuantizedModule | |||||
| class Elemwise(QuantizedModule): | class Elemwise(QuantizedModule): | ||||
| r"""quantized version of :class:`~.qat.elemwise.Elemwise`.""" | |||||
| r"""Quantized version of :class:`~.qat.elemwise.Elemwise`.""" | |||||
| _elemwise_multi_type_mode = P.ElemwiseMultiType.Mode | _elemwise_multi_type_mode = P.ElemwiseMultiType.Mode | ||||
| @@ -30,7 +30,7 @@ class Elemwise(QuantizedModule): | |||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT.Elemwise): | def from_qat_module(cls, qat_module: QAT.Elemwise): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| return cls(qat_module.method.name, qat_module.get_activation_dtype()) | return cls(qat_module.method.name, qat_module.get_activation_dtype()) | ||||
| @@ -15,7 +15,7 @@ from .module import QuantizedModule | |||||
| class Linear(QuantizedModule): | class Linear(QuantizedModule): | ||||
| r"""quantized version of :class:`~.qat.linear.Linear`.""" | |||||
| r"""Quantized version of :class:`~.qat.linear.Linear`.""" | |||||
| def __init__( | def __init__( | ||||
| self, dtype: np.dtype = None, | self, dtype: np.dtype = None, | ||||
| @@ -31,7 +31,7 @@ class Linear(QuantizedModule): | |||||
| inp_scale = dtype.get_scale(inp.dtype) | inp_scale = dtype.get_scale(inp.dtype) | ||||
| w_scale = dtype.get_scale(self.weight.dtype) | w_scale = dtype.get_scale(self.weight.dtype) | ||||
| bias_dtype = dtype.qint32(inp_scale * w_scale) | bias_dtype = dtype.qint32(inp_scale * w_scale) | ||||
| return F.linear( | |||||
| return F.nn.linear( | |||||
| inp, | inp, | ||||
| self.weight, | self.weight, | ||||
| None if self.bias is None else self.bias.astype(bias_dtype), | None if self.bias is None else self.bias.astype(bias_dtype), | ||||
| @@ -40,7 +40,7 @@ class Linear(QuantizedModule): | |||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT.Linear): | def from_qat_module(cls, qat_module: QAT.Linear): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| output_dtype = qat_module.get_activation_dtype() | output_dtype = qat_module.get_activation_dtype() | ||||
| @@ -26,6 +26,6 @@ class QuantizedModule(Module): | |||||
| @abstractmethod | @abstractmethod | ||||
| def from_qat_module(cls, qat_module: QATModule): | def from_qat_module(cls, qat_module: QATModule): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| @@ -11,7 +11,7 @@ from .module import QuantizedModule | |||||
| class QuantStub(QuantizedModule): | class QuantStub(QuantizedModule): | ||||
| r""" | r""" | ||||
| quantized version of :class:`~.qat.quant_dequant.QuantStub`, | |||||
| Quantized version of :class:`~.qat.quant_dequant.QuantStub`, | |||||
| will convert input to quantized dtype. | will convert input to quantized dtype. | ||||
| """ | """ | ||||
| @@ -25,7 +25,7 @@ class QuantStub(QuantizedModule): | |||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT.QuantStub): | def from_qat_module(cls, qat_module: QAT.QuantStub): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| return cls(qat_module.get_activation_dtype()) | return cls(qat_module.get_activation_dtype()) | ||||
| @@ -33,7 +33,7 @@ class QuantStub(QuantizedModule): | |||||
| class DequantStub(QuantizedModule): | class DequantStub(QuantizedModule): | ||||
| r""" | r""" | ||||
| quantized version of :class:`~.qat.quant_dequant.DequantStub`, | |||||
| Quantized version of :class:`~.qat.quant_dequant.DequantStub`, | |||||
| will restore quantized input to float32 dtype. | will restore quantized input to float32 dtype. | ||||
| """ | """ | ||||
| @@ -43,7 +43,7 @@ class DequantStub(QuantizedModule): | |||||
| @classmethod | @classmethod | ||||
| def from_qat_module(cls, qat_module: QAT.DequantStub): | def from_qat_module(cls, qat_module: QAT.DequantStub): | ||||
| r""" | r""" | ||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| Return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | :class:`~.QATModule` instance. | ||||
| """ | """ | ||||
| return cls() | return cls() | ||||
| @@ -26,40 +26,40 @@ class Sequential(Module): | |||||
| import megengine as mge | import megengine as mge | ||||
| import megengine.module as M | import megengine.module as M | ||||
| import megengine.functional as F | import megengine.functional as F | ||||
| from collections import OrderedDict | |||||
| batch_size = 64 | batch_size = 64 | ||||
| data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) | data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) | ||||
| label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) | ||||
| data = data.reshape(batch_size, -1) | data = data.reshape(batch_size, -1) | ||||
| net = M.Sequential( | |||||
| net0 = M.Sequential( | |||||
| M.Linear(28 * 28, 320), | M.Linear(28 * 28, 320), | ||||
| M.Linear(320, 500), | |||||
| M.Linear(500, 320), | |||||
| M.Linear(320, 10) | M.Linear(320, 10) | ||||
| ) | ) | ||||
| pred = net(data) | |||||
| pred0 = net0(data) | |||||
| loss = F.cross_entropy_with_softmax(pred, label) | |||||
| modules = OrderedDict() | |||||
| modules["fc0"] = nn.Linear(28 * 28, 320) | |||||
| modules["fc1"] = nn.Linear(320, 10) | |||||
| net1 = nn.Sequential(modules) | |||||
| pred1 = net1(data) | |||||
| """ | """ | ||||
| def __init__(self, *args): | def __init__(self, *args): | ||||
| super().__init__() | super().__init__() | ||||
| self.layer_keys = [] | self.layer_keys = [] | ||||
| self.layer_values = [] | |||||
| if len(args) == 1 and isinstance(args[0], OrderedDict): | if len(args) == 1 and isinstance(args[0], OrderedDict): | ||||
| for key, module in args[0].items(): | for key, module in args[0].items(): | ||||
| # self.add_module(key, module) | # self.add_module(key, module) | ||||
| setattr(self, key, module) | setattr(self, key, module) | ||||
| self.layer_keys.append(key) | self.layer_keys.append(key) | ||||
| self.layer_values.append(module) | |||||
| else: | else: | ||||
| for idx, module in enumerate(args): | for idx, module in enumerate(args): | ||||
| # self.add_module(str(idx), module) | # self.add_module(str(idx), module) | ||||
| setattr(self, str(idx), module) | setattr(self, str(idx), module) | ||||
| self.layer_keys.append(str(idx)) | self.layer_keys.append(str(idx)) | ||||
| self.layer_values.append(module) | |||||
| def __getitem__(self, idx): | def __getitem__(self, idx): | ||||
| if isinstance(idx, slice): | if isinstance(idx, slice): | ||||
| @@ -67,11 +67,10 @@ class Sequential(Module): | |||||
| OrderedDict(zip(self.layer_keys[idx], self.layer_values[idx])) | OrderedDict(zip(self.layer_keys[idx], self.layer_values[idx])) | ||||
| ) | ) | ||||
| else: | else: | ||||
| return self.layer_values[idx] | |||||
| return getattr(self, self.layer_keys[idx]) | |||||
| def __setitem__(self, idx, module): | def __setitem__(self, idx, module): | ||||
| key = self.layer_keys[idx] | key = self.layer_keys[idx] | ||||
| self.layer_values[idx] = module | |||||
| return setattr(self, key, module) | return setattr(self, key, module) | ||||
| def __delitem__(self, idx): | def __delitem__(self, idx): | ||||
| @@ -79,11 +78,9 @@ class Sequential(Module): | |||||
| for key in self.layer_keys[idx]: | for key in self.layer_keys[idx]: | ||||
| delattr(self, key) | delattr(self, key) | ||||
| del self.layer_keys[idx] | del self.layer_keys[idx] | ||||
| del self.layer_values[idx] | |||||
| else: | else: | ||||
| delattr(self, self.layer_keys[idx]) | delattr(self, self.layer_keys[idx]) | ||||
| del self.layer_keys[idx] | del self.layer_keys[idx] | ||||
| del self.layer_values[idx] | |||||
| def __len__(self): | def __len__(self): | ||||
| return len(self.layer_keys) | return len(self.layer_keys) | ||||
| @@ -91,6 +88,10 @@ class Sequential(Module): | |||||
| def __iter__(self): | def __iter__(self): | ||||
| return iter(self.layer_values) | return iter(self.layer_values) | ||||
| @property | |||||
| def layer_values(self): | |||||
| return [getattr(self, key) for key in self.layer_keys] | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| for layer in self.layer_values: | for layer in self.layer_values: | ||||
| inp = layer(inp) | inp = layer(inp) | ||||
| @@ -22,13 +22,13 @@ class Adadelta(Optimizer): | |||||
| :param params: iterable of parameters to optimize or dicts defining | :param params: iterable of parameters to optimize or dicts defining | ||||
| parameter groups. | parameter groups. | ||||
| :param lr: coefficient that scale delta before it is applied | |||||
| to the parameters (default: 1.0). | |||||
| :param lr: coefficient that scales delta before it is applied | |||||
| to the parameters. Default: 1.0 | |||||
| :param rho: coefficient used for computing a running average | :param rho: coefficient used for computing a running average | ||||
| of squared gradients (default: 0.9). | |||||
| of squared gradients. Default: 0.9 | |||||
| :param eps: term added to the denominator to improve | :param eps: term added to the denominator to improve | ||||
| numerical stability (default: 1e-6). | |||||
| :param weight_decay: weight decay (L2 penalty) (default: 0). | |||||
| numerical stability. Default: 1e-6 | |||||
| :param weight_decay: weight decay (L2 penalty). Default: 0 | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||
| @@ -23,12 +23,12 @@ class Adagrad(Optimizer): | |||||
| :param params: iterable of parameters to optimize or dicts defining | :param params: iterable of parameters to optimize or dicts defining | ||||
| parameter groups. | parameter groups. | ||||
| :param lr: coefficient that scale delta before it is applied | |||||
| to the parameters (default: 1e-2). | |||||
| :param lr_decay: learning rate decay (default: 0) | |||||
| :param lr: coefficient that scales delta before it is applied | |||||
| to the parameters. Default: 1e-2 | |||||
| :param lr_decay: learning rate decay. Default: 0 | |||||
| :param eps: term added to the denominator to improve | :param eps: term added to the denominator to improve | ||||
| numerical stability (default: 1e-10). | |||||
| :param weight_decay: weight decay (L2 penalty) (default: 0). | |||||
| numerical stability. Default: 1e-10 | |||||
| :param weight_decay: weight decay (L2 penalty). Default: 0 | |||||
| """ | """ | ||||
| def __init__( | def __init__( | ||||