Browse Source

Merge branch 'master' into release-1.0

tags/v1.0.0
Megvii Engine Team 5 years ago
parent
commit
0a0e4b60dd
100 changed files with 2190 additions and 1388 deletions
  1. +17
    -2
      CMakeLists.txt
  2. +47
    -0
      dnn/include/megdnn/oprs/nn.h
  3. +5
    -0
      dnn/scripts/opr_param_defs.py
  4. +8
    -0
      dnn/src/atlas/megcore/computing_context.cpp
  5. +37
    -0
      dnn/src/common/adaptive_pooling.cpp
  6. +14
    -7
      dnn/src/common/basic_types.cpp
  7. +2
    -0
      dnn/src/common/handle_impl.h
  8. +53
    -0
      dnn/src/cuda/adaptive_pooling/opr_impl.cpp
  9. +44
    -0
      dnn/src/cuda/adaptive_pooling/opr_impl.h
  10. +1
    -0
      dnn/src/cuda/handle_create.cpp
  11. +1
    -0
      dnn/src/cuda/indexing_multi_axis_vec/kern_apply_opr_impl.cuinl
  12. +5
    -0
      dnn/src/cuda/indexing_multi_axis_vec/kern_apply_opr_incr.cu
  13. +1
    -0
      dnn/src/cuda/indexing_multi_axis_vec/opr_impl.cpp
  14. +52
    -0
      dnn/src/naive/adaptive_pooling/opr_impl.cpp
  15. +43
    -0
      dnn/src/naive/adaptive_pooling/opr_impl.h
  16. +1
    -0
      dnn/src/naive/handle.cpp
  17. +1
    -0
      dnn/src/naive/indexing_multi_axis_vec/opr_impl.cpp
  18. +55
    -0
      dnn/test/common/adaptive_pooling.h
  19. +2
    -0
      dnn/test/common/opr_trait.h
  20. +97
    -0
      dnn/test/cuda/adaptive_pooling.cpp
  21. +6
    -10
      dnn/test/cuda/conv_bias_int8.cpp
  22. +15
    -3
      imperative/CMakeLists.txt
  23. +1
    -1
      imperative/python/megengine/__init__.py
  24. +7
    -5
      imperative/python/megengine/autodiff/grad_manager.py
  25. +0
    -1
      imperative/python/megengine/core/__init__.py
  26. +3
    -1
      imperative/python/megengine/core/_wrap.py
  27. +2
    -2
      imperative/python/megengine/core/autodiff/builtin_op_utils.py
  28. +6
    -6
      imperative/python/megengine/core/tensor/function.py
  29. +2
    -2
      imperative/python/megengine/core/tensor/indexing.py
  30. +35
    -6
      imperative/python/megengine/core/tensor/megbrain_graph.py
  31. +3
    -1
      imperative/python/megengine/core/tensor/multipledispatch/conflict.py
  32. +2
    -0
      imperative/python/megengine/core/tensor/raw_tensor/__init__.py
  33. +65
    -3
      imperative/python/megengine/core/tensor/tensor_wrapper.py
  34. +68
    -33
      imperative/python/megengine/core/tensor/utils.py
  35. +0
    -9
      imperative/python/megengine/core/utils/__init__.py
  36. +6
    -1
      imperative/python/megengine/data/_queue.py
  37. +4
    -4
      imperative/python/megengine/data/collator.py
  38. +6
    -6
      imperative/python/megengine/data/dataloader.py
  39. +5
    -5
      imperative/python/megengine/data/dataset/meta_dataset.py
  40. +1
    -1
      imperative/python/megengine/data/dataset/vision/cifar.py
  41. +1
    -1
      imperative/python/megengine/data/dataset/vision/coco.py
  42. +6
    -7
      imperative/python/megengine/data/dataset/vision/folder.py
  43. +9
    -9
      imperative/python/megengine/data/dataset/vision/imagenet.py
  44. +7
    -7
      imperative/python/megengine/data/dataset/vision/mnist.py
  45. +1
    -1
      imperative/python/megengine/data/dataset/vision/objects365.py
  46. +15
    -25
      imperative/python/megengine/data/dataset/vision/voc.py
  47. +18
    -18
      imperative/python/megengine/data/sampler.py
  48. +1
    -1
      imperative/python/megengine/data/transform/meta_transform.py
  49. +31
    -27
      imperative/python/megengine/data/transform/vision/functional.py
  50. +68
    -68
      imperative/python/megengine/data/transform/vision/transform.py
  51. +8
    -9
      imperative/python/megengine/device.py
  52. +49
    -49
      imperative/python/megengine/distributed/functional.py
  53. +15
    -15
      imperative/python/megengine/distributed/group.py
  54. +132
    -13
      imperative/python/megengine/distributed/helper.py
  55. +2
    -2
      imperative/python/megengine/distributed/launcher.py
  56. +94
    -38
      imperative/python/megengine/distributed/server.py
  57. +1
    -4
      imperative/python/megengine/functional/__init__.py
  58. +4
    -4
      imperative/python/megengine/functional/debug_param.py
  59. +73
    -89
      imperative/python/megengine/functional/elemwise.py
  60. +0
    -44
      imperative/python/megengine/functional/external.py
  61. +0
    -41
      imperative/python/megengine/functional/graph.py
  62. +50
    -24
      imperative/python/megengine/functional/loss.py
  63. +81
    -56
      imperative/python/megengine/functional/math.py
  64. +184
    -180
      imperative/python/megengine/functional/nn.py
  65. +0
    -34
      imperative/python/megengine/functional/param_pack.py
  66. +7
    -10
      imperative/python/megengine/functional/quantized.py
  67. +62
    -199
      imperative/python/megengine/functional/tensor.py
  68. +19
    -24
      imperative/python/megengine/functional/utils.py
  69. +3
    -3
      imperative/python/megengine/hub/exceptions.py
  70. +14
    -14
      imperative/python/megengine/hub/fetcher.py
  71. +23
    -23
      imperative/python/megengine/hub/hub.py
  72. +7
    -7
      imperative/python/megengine/hub/tools.py
  73. +116
    -63
      imperative/python/megengine/jit/tracing.py
  74. +1
    -1
      imperative/python/megengine/logger.py
  75. +1
    -0
      imperative/python/megengine/module/__init__.py
  76. +7
    -5
      imperative/python/megengine/module/activation.py
  77. +114
    -0
      imperative/python/megengine/module/adaptive_pooling.py
  78. +14
    -8
      imperative/python/megengine/module/batchnorm.py
  79. +31
    -18
      imperative/python/megengine/module/conv.py
  80. +4
    -1
      imperative/python/megengine/module/dropout.py
  81. +3
    -3
      imperative/python/megengine/module/elemwise.py
  82. +6
    -6
      imperative/python/megengine/module/embedding.py
  83. +2
    -2
      imperative/python/megengine/module/identity.py
  84. +40
    -40
      imperative/python/megengine/module/init.py
  85. +7
    -2
      imperative/python/megengine/module/linear.py
  86. +79
    -25
      imperative/python/megengine/module/module.py
  87. +5
    -0
      imperative/python/megengine/module/pooling.py
  88. +5
    -13
      imperative/python/megengine/module/qat/conv_bn.py
  89. +1
    -1
      imperative/python/megengine/module/qat/linear.py
  90. +1
    -1
      imperative/python/megengine/module/qat/module.py
  91. +2
    -2
      imperative/python/megengine/module/quantized/concat.py
  92. +5
    -5
      imperative/python/megengine/module/quantized/conv.py
  93. +5
    -5
      imperative/python/megengine/module/quantized/conv_bn.py
  94. +2
    -2
      imperative/python/megengine/module/quantized/elemwise.py
  95. +3
    -3
      imperative/python/megengine/module/quantized/linear.py
  96. +1
    -1
      imperative/python/megengine/module/quantized/module.py
  97. +4
    -4
      imperative/python/megengine/module/quantized/quant_dequant.py
  98. +13
    -12
      imperative/python/megengine/module/sequential.py
  99. +5
    -5
      imperative/python/megengine/optimizer/adadelta.py
  100. +5
    -5
      imperative/python/megengine/optimizer/adagrad.py

+ 17
- 2
CMakeLists.txt View File

@@ -53,9 +53,11 @@ option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON) option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
option(MGE_BUILD_SDK "Build load_and_run" ON) option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF) option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_PYTHON_MODULE "Build MegEngine legacy Python Module." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
option(MGE_WITH_ROCM "Enable ROCM support" OFF) option(MGE_WITH_ROCM "Enable ROCM support" OFF)



if(NOT ${MGE_BIN_REDUCE} STREQUAL "") if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
message("build with BIN REDUCE") message("build with BIN REDUCE")
if(MGE_WITH_MINIMUM_SIZE) if(MGE_WITH_MINIMUM_SIZE)
@@ -152,6 +154,14 @@ if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} S
endif() endif()


if(MSVC OR WIN32) if(MSVC OR WIN32)
# for cmake after 3.15.2
cmake_policy(SET CMP0091 NEW)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
endif()

add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1) add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
message("-- into windows build...") message("-- into windows build...")
message("-- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") message("-- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
@@ -285,7 +295,6 @@ if(MGE_WITH_TEST)
endif() endif()


if(MGE_BUILD_IMPERATIVE_RT) if(MGE_BUILD_IMPERATIVE_RT)
add_compile_definitions(MGB_ENABLE_IMPERATIVE_RUNTIME)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
endif() endif()


@@ -701,7 +710,8 @@ endif()


set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")


set(MGB_ENABLE_IMPERATIVE ${MGE_BUILD_IMPERATIVE_RT})
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")

# Write out megbrain_build_config.h # Write out megbrain_build_config.h
# It defines macros needed by both megbrain and dnn # It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
@@ -831,3 +841,8 @@ if(MSVC OR WIN32)
endif() endif()
endforeach() endforeach()
endif() endif()

if(MGE_WITH_JIT_MLIR)
add_subdirectory(tools/mlir/mgb-opt)
add_subdirectory(tools/mlir/mgb-file-check)
endif()

+ 47
- 0
dnn/include/megdnn/oprs/nn.h View File

@@ -682,6 +682,53 @@ protected:
size_t workspace_in_bytes); size_t workspace_in_bytes);
}; };


/**
* \brief base class for AdaptivePooling
*/
class AdaptivePoolingBase : public OperatorBase {
DEF_OPR_IMPL_CTOR(AdaptivePoolingBase, OperatorBase);
DEF_OPR_PARAM(AdaptivePooling);

protected:
param::Pooling deduce_pooling_param(const TensorLayout& src,
const TensorLayout& dst);
};

class AdaptivePoolingForward : public AdaptivePoolingBase {
DEF_OPR_IMPL(AdaptivePoolingForward, AdaptivePoolingBase, 1, 1);

public:
/**
* \param[in] src input tensor
* \param[out] dst output tensor
*/
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) = 0;
};

using AdaptivePooling = AdaptivePoolingForward;

class AdaptivePoolingBackward : public AdaptivePoolingBase {
DEF_OPR_IMPL(AdaptivePoolingBackward, AdaptivePoolingBase, 3, 1);

public:
/**
* \param[in] src the `src' parameter in AdaptivePoolingForward::exec
* \param[in] dst the `dst' parameter in AdaptivePoolingForward::exec
* \param[in] diff the backpropagated gradient wrt. dst
* \param[out] grad the backpropagated gradient wrt. src
*/
virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) = 0;
};

/** /**
* \brief base class for Local * \brief base class for Local
*/ */


+ 5
- 0
dnn/scripts/opr_param_defs.py View File

@@ -179,6 +179,11 @@ pdef('Axis').add_fields('int32', 'axis', 0)
add_enum_alias('Format', 'ConvolutionV0') add_enum_alias('Format', 'ConvolutionV0')
) )


(pdef('AdaptivePooling').
add_enum_alias('Mode', 'Pooling').
add_enum_alias('Format', 'ConvolutionV0')
)

(pdef('LRN', (pdef('LRN',
'see ImageNet Classification with Deep Convolutional Neural Networks for' 'see ImageNet Classification with Deep Convolutional Neural Networks for'
' meaning of the fields'). ' meaning of the fields').


+ 8
- 0
dnn/src/atlas/megcore/computing_context.cpp View File

@@ -55,8 +55,12 @@ void AtlasComputingContext::memcpy(void* dst, const void* src,
default: default:
megdnn_throw("bad atlas memcpy kind"); megdnn_throw("bad atlas memcpy kind");
} }
#if MGB_USE_ATLAS_ASYNC_API
acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes, acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes,
atlas_kind, m_ctx.stream)); atlas_kind, m_ctx.stream));
#else
acl_check(aclrtMemcpy(dst, size_in_bytes, src, size_in_bytes, atlas_kind));
#endif
} }


void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) { void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) {
@@ -65,7 +69,11 @@ void AtlasComputingContext::memset(void* dst, int value, size_t size_in_bytes) {
} }


void AtlasComputingContext::synchronize() { void AtlasComputingContext::synchronize() {
#if MGB_USE_ATLAS_ASYNC_API
acl_check(aclrtSynchronizeStream(m_ctx.stream)); acl_check(aclrtSynchronizeStream(m_ctx.stream));
#else
return;
#endif
} }


// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen

+ 37
- 0
dnn/src/common/adaptive_pooling.cpp View File

@@ -0,0 +1,37 @@
/**
* \file dnn/src/common/adaptive_pooling.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"

#include "src/common/utils.h"
namespace megdnn {

param::Pooling AdaptivePoolingBase::deduce_pooling_param(
const TensorLayout& src, const TensorLayout& dst) {
megdnn_assert(param().format == param::AdaptivePooling::Format::NCHW);
size_t IH = src.shape[2], IW = src.shape[3], OH = dst.shape[2],
OW = dst.shape[3];

param::Pooling ret;
ret.mode = param().mode;
ret.format = param().format;
ret.pad_h = ret.pad_w = 0;
ret.stride_h = floor(IH / OH);
ret.stride_w = floor(IW / OW);
ret.window_h = IH - (OH - 1) * ret.stride_h;
ret.window_w = IW - (OW - 1) * ret.stride_w;

return ret;
}
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 14
- 7
dnn/src/common/basic_types.cpp View File

@@ -392,8 +392,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
TensorLayout result{dtype, format}; TensorLayout result{dtype, format};
result.ndim = tshape.ndim; result.ndim = tshape.ndim;
for (size_t i = 0; i < tshape.ndim; i++) { for (size_t i = 0; i < tshape.ndim; i++) {
megdnn_throw_if(!tshape.shape[i], tensor_reshape_error,
megdnn_mangle("target shape is 0"));
result.shape[i] = tshape.shape[i]; result.shape[i] = tshape.shape[i];
result.stride[i] = (tshape.shape[i] == 1); result.stride[i] = (tshape.shape[i] == 1);
} }
@@ -409,8 +407,6 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
for (size_t i = 0; i < tshape.ndim; ++i) { for (size_t i = 0; i < tshape.ndim; ++i) {
int target_idx = tshape.ndim - i - 1; int target_idx = tshape.ndim - i - 1;
int cur_idx = ndim - i - 1; int cur_idx = ndim - i - 1;
megdnn_throw_if(!tshape.shape[target_idx], tensor_reshape_error,
megdnn_mangle("target shape is 0"));
size_t cur_shape = (cur_idx >= 0 ? shape[cur_idx] : 1), size_t cur_shape = (cur_idx >= 0 ? shape[cur_idx] : 1),
cur_stride = (cur_idx >= 0 ? stride[cur_idx] : 0); cur_stride = (cur_idx >= 0 ? stride[cur_idx] : 0);
if (tshape.shape[target_idx] != cur_shape) { if (tshape.shape[target_idx] != cur_shape) {
@@ -434,10 +430,16 @@ TensorLayout TensorLayout::broadcast(const TensorShape& tshape) const {
bool TensorLayout::try_reshape(TensorLayout& result, bool TensorLayout::try_reshape(TensorLayout& result,
const TensorShape& tshp) const { const TensorShape& tshp) const {
megdnn_assert(tshp.ndim); megdnn_assert(tshp.ndim);

bool is_empty_shape = false;
for (size_t i = 0; i < tshp.ndim; ++i) { for (size_t i = 0; i < tshp.ndim; ++i) {
megdnn_throw_if(!tshp.shape[i], tensor_reshape_error,
megdnn_mangle(ssprintf("bad target tshp: %s",
tshp.to_string().c_str())));
if (!tshp.shape[i]) {
megdnn_throw_if(!format.is_default(), tensor_reshape_error,
megdnn_mangle(ssprintf("bad target tshp: %s",
tshp.to_string().c_str())));
is_empty_shape = true;
break;
}
} }


megdnn_throw_if( megdnn_throw_if(
@@ -454,6 +456,11 @@ bool TensorLayout::try_reshape(TensorLayout& result,
result.format = this->format; result.format = this->format;
result.TensorShape::operator=(tshp); result.TensorShape::operator=(tshp);


if (is_empty_shape) {
result.init_contiguous_stride();
return true;
}

size_t sdim = 0, prod = 1, cont_sdim = 0; size_t sdim = 0, prod = 1, cont_sdim = 0;
for (size_t i = 0; i < tshp.ndim; ++i) { for (size_t i = 0; i < tshp.ndim; ++i) {
megdnn_assert(cont_sdim < cont.ndim); megdnn_assert(cont_sdim < cont.ndim);


+ 2
- 0
dnn/src/common/handle_impl.h View File

@@ -199,6 +199,8 @@ private:
cb(Remap) \ cb(Remap) \
cb(RemapBackwardData) \ cb(RemapBackwardData) \
cb(RemapBackwardMat) \ cb(RemapBackwardMat) \
cb(AdaptivePoolingForward) \
cb(AdaptivePoolingBackward) \


/*! /*!
* \brief specialize HandleImpl::create_operator for a single opr type; * \brief specialize HandleImpl::create_operator for a single opr type;


+ 53
- 0
dnn/src/cuda/adaptive_pooling/opr_impl.cpp View File

@@ -0,0 +1,53 @@
/**
* \file dnn/src/cuda/adaptive_pooling/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/utils.h"

namespace megdnn {
namespace cuda {

void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
auto opr = handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, workspace);
}

size_t AdaptivePoolingForwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst) {
auto opr = handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst);
}

void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_in dst,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
_megdnn_workspace workspace) {
auto opr = handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, diff, grad, workspace);
}

size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst,
const TensorLayout& diff, const TensorLayout& grad) {
auto opr = handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst, diff, grad);
}
} // namespace cuda
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 44
- 0
dnn/src/cuda/adaptive_pooling/opr_impl.h View File

@@ -0,0 +1,44 @@
/**
* \file dnn/src/cuda/adaptive_pooling/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"

#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/utils.h"

namespace megdnn {
namespace cuda {

class AdaptivePoolingForwardImpl final : public AdaptivePoolingForward {
public:
using AdaptivePoolingForward::AdaptivePoolingForward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst) override;
};

class AdaptivePoolingBackwardImpl final : public AdaptivePoolingBackward {
public:
using AdaptivePoolingBackward::AdaptivePoolingBackward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) override;
};
} // namespace cuda
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 1
- 0
dnn/src/cuda/handle_create.cpp View File

@@ -11,6 +11,7 @@


#include "src/common/handle_impl.h" #include "src/common/handle_impl.h"


#include "src/cuda/adaptive_pooling/opr_impl.h"
#include "src/cuda/add_update/opr_impl.h" #include "src/cuda/add_update/opr_impl.h"
#include "src/cuda/argmxx/opr_impl.h" #include "src/cuda/argmxx/opr_impl.h"
#include "src/cuda/argsort/opr_impl.h" #include "src/cuda/argsort/opr_impl.h"


+ 1
- 0
dnn/src/cuda/indexing_multi_axis_vec/kern_apply_opr_impl.cuinl View File

@@ -72,6 +72,7 @@ namespace indexing_multi_axis_vec {
#define cb0(_dtype) \ #define cb0(_dtype) \
MEGDNN_FOREACH_TENSOR_NDIM(INST, DTypeTrait<_dtype>::ctype) MEGDNN_FOREACH_TENSOR_NDIM(INST, DTypeTrait<_dtype>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE(cb0) MEGDNN_FOREACH_COMPUTING_DTYPE(cb0)
cb0(::megdnn::dtype::Bool)
#undef cb0 #undef cb0
#undef INST #undef INST




+ 5
- 0
dnn/src/cuda/indexing_multi_axis_vec/kern_apply_opr_incr.cu View File

@@ -39,6 +39,11 @@ __device__ void atomicAdd(megdnn::dt_int16 *, megdnn::dt_int16) {
((int*)0)[0] = 1; ((int*)0)[0] = 1;
} }


__device__ void atomicAdd(megdnn::dt_bool *, megdnn::dt_bool) {
__trap();
((int*)0)[0] = 1;
}

#define KERN_APPLY_OPR_OPR \ #define KERN_APPLY_OPR_OPR \
::megdnn::cuda::indexing_multi_axis_vec::OprAtomicIncr ::megdnn::cuda::indexing_multi_axis_vec::OprAtomicIncr
#include "./kern_apply_opr_impl.cuinl" #include "./kern_apply_opr_impl.cuinl"


+ 1
- 0
dnn/src/cuda/indexing_multi_axis_vec/opr_impl.cpp View File

@@ -120,6 +120,7 @@ void ExecImpl<Opr>::dispatch_exec() {
case DTypeTrait<_dtype>::enumv: \ case DTypeTrait<_dtype>::enumv: \
return dispatch_exec_ctype<DTypeTrait<_dtype>::ctype>(); return dispatch_exec_ctype<DTypeTrait<_dtype>::ctype>();
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
cb(::megdnn::dtype::Bool)
#undef cb #undef cb
default: default:
megdnn_throw("bad dtype"); megdnn_throw("bad dtype");


+ 52
- 0
dnn/src/naive/adaptive_pooling/opr_impl.cpp View File

@@ -0,0 +1,52 @@
/**
* \file dnn/src/naive/adaptive_pooling/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/adaptive_pooling/opr_impl.h"

#include "src/common/opr_delegate.h"
#include "src/common/utils.h"
#include "src/naive/handle.h"

namespace megdnn {
namespace naive {

void AdaptivePoolingForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), {
auto opr = inplace_cpu_handle()->create_operator<PoolingForward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, workspace);
});
}

void AdaptivePoolingBackwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_in dst,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
_megdnn_workspace workspace) {
MEGDNN_DISPATCH_CPU_KERN(static_cast<naive::HandleImpl*>(handle()), {
auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src.layout, dst.layout);
opr->exec(src, dst, diff, grad, workspace);
});
}

size_t AdaptivePoolingBackwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& dst,
const TensorLayout& diff, const TensorLayout& grad) {
auto opr = inplace_cpu_handle()->create_operator<PoolingBackward>();
opr->param() = deduce_pooling_param(src, dst);
return opr->get_workspace_in_bytes(src, dst, diff, grad);
}
} // namespace naive
} // namespace megdnn
// vim: syntax=cpp.doxygen

+ 43
- 0
dnn/src/naive/adaptive_pooling/opr_impl.h View File

@@ -0,0 +1,43 @@
/**
* \file dnn/src/naive/adaptive_pooling/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/common/utils.h"

namespace megdnn {
namespace naive {

class AdaptivePoolingForwardImpl : public AdaptivePoolingForward {
public:
using AdaptivePoolingForward::AdaptivePoolingForward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout&,
const TensorLayout&) override {
return 0;
}
};

class AdaptivePoolingBackwardImpl : public AdaptivePoolingBackward {
public:
using AdaptivePoolingBackward::AdaptivePoolingBackward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& dst,
const TensorLayout& diff,
const TensorLayout& grad) override;
};
} // namespace naive
} // namespace megdnn
// vim: syntax=cpp.doxygen

+ 1
- 0
dnn/src/naive/handle.cpp View File

@@ -13,6 +13,7 @@


#include "src/common/handle_impl.h" #include "src/common/handle_impl.h"


#include "src/naive/adaptive_pooling/opr_impl.h"
#include "src/naive/add_update/opr_impl.h" #include "src/naive/add_update/opr_impl.h"
#include "src/naive/argmxx/opr_impl.h" #include "src/naive/argmxx/opr_impl.h"
#include "src/naive/argsort/opr_impl.h" #include "src/naive/argsort/opr_impl.h"


+ 1
- 0
dnn/src/naive/indexing_multi_axis_vec/opr_impl.cpp View File

@@ -88,6 +88,7 @@ void dispatch_exec(HandleImpl *handle,
} }
switch (data.layout.dtype.enumv()) { switch (data.layout.dtype.enumv()) {
MEGDNN_FOREACH_COMPUTING_DTYPE(cb) MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
cb(::megdnn::dtype::Bool)
default: default:
megdnn_throw(megdnn_mangle("bad dtype")); megdnn_throw(megdnn_mangle("bad dtype"));
} }


+ 55
- 0
dnn/test/common/adaptive_pooling.h View File

@@ -0,0 +1,55 @@
/**
* \file dnn/test/common/adaptive_pooling.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include <cstddef>
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"

namespace megdnn {
namespace test {
namespace adaptive_pooling {

struct TestArg {
param::AdaptivePooling param;
TensorShape ishape;
TensorShape oshape;
TestArg(param::AdaptivePooling param, TensorShape ishape,
TensorShape oshape)
: param(param), ishape(ishape), oshape(oshape) {}
};

inline std::vector<TestArg> get_args() {
std::vector<TestArg> args;
using Param = param::AdaptivePooling;
using Mode = param::AdaptivePooling::Mode;

for (size_t i = 36; i < 40; ++i) {
args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 4, i - 2});
args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 4, i - 2});
}

for (size_t i = 5; i < 10; ++i) {
args.emplace_back(Param{Mode::AVERAGE}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 3, i - 2});
args.emplace_back(Param{Mode::MAX}, TensorShape{2, 3, i, i + 1},
TensorShape{2, 3, i - 3, i - 2});
}
return args;
}

} // namespace adaptive_pooling
} // namespace test
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 2
- 0
dnn/test/common/opr_trait.h View File

@@ -41,6 +41,8 @@ DEF(Images2NeibsForward, 2, true, true);
DEF(Images2NeibsBackward, 2, true, false); DEF(Images2NeibsBackward, 2, true, false);
DEF(PoolingForward, 2, true, true); DEF(PoolingForward, 2, true, true);
DEF(PoolingBackward, 4, true, false); DEF(PoolingBackward, 4, true, false);
DEF(AdaptivePoolingForward, 2, true, false);
DEF(AdaptivePoolingBackward, 4, true, false);
DEF(LocalForward, 3, true, true); DEF(LocalForward, 3, true, true);
DEF(LocalBackwardData, 3, true, false); DEF(LocalBackwardData, 3, true, false);
DEF(LocalBackwardFilter, 3, true, false); DEF(LocalBackwardFilter, 3, true, false);


+ 97
- 0
dnn/test/cuda/adaptive_pooling.cpp View File

@@ -0,0 +1,97 @@
/**
* \file dnn/test/cuda/adaptive_pooling.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/cuda/fixture.h"

#include "megdnn/tensor_iter.h"
#include "test/common/adaptive_pooling.h"
#include "test/common/checker.h"

#include "src/common/utils.h"
#include "test/cuda/utils.h"

#include <cudnn.h>
#include "test/cuda/benchmark.h"

namespace megdnn {
namespace test {

TEST_F(CUDA, ADAPTIVE_POOLING_FORWARD) {
auto args = adaptive_pooling::get_args();
using Format = param::AdaptivePooling::Format;
DType dtype = dtype::Float32();
for (auto&& arg : args) {
auto param = arg.param;
auto src = arg.ishape;
auto dst = arg.oshape;
param.format = Format::NCHW;
Checker<AdaptivePooling> checker(handle_cuda());
checker.set_epsilon(1e-2);
checker.set_param(param).set_dtype(0, dtype).set_dtype(1, dtype).exec(
TensorShapeArray{src, dst, {}});
}
}

TEST_F(CUDA, ADAPTIVE_POOLING_BACKWARD) {
auto args = adaptive_pooling::get_args();
for (auto&& arg : args) {
Checker<AdaptivePoolingBackward> checker(handle_cuda());
TensorLayout ilayout = TensorLayout(arg.ishape, dtype::Float32());
TensorLayout olayout = TensorLayout(arg.oshape, dtype::Float32());

auto constraint = [this,
arg](CheckerHelper::TensorValueArray& tensors_orig) {
megdnn_assert(tensors_orig.size() == 4);
auto opr = handle_cuda()->create_operator<AdaptivePoolingForward>();
opr->param() = arg.param;

auto tensors_cuda_storage = CheckerHelper::alloc_tensors(
handle_cuda(),
{tensors_orig[0].layout, tensors_orig[1].layout}, 0);
auto&& tensors_cuda = *tensors_cuda_storage;

auto span = tensors_cuda[0].layout.span();
auto dst = static_cast<dt_byte*>(tensors_cuda[0].raw_ptr) +
span.low_byte;
auto src = static_cast<const dt_byte*>(tensors_orig[0].raw_ptr) +
span.low_byte;
megdnn_memcpy_H2D(handle_cuda(), dst, src, span.dist_byte());

auto workspace_size = opr->get_workspace_in_bytes(
tensors_cuda[0].layout, tensors_cuda[1].layout);
auto workspace_cuda = megdnn_malloc(handle_cuda(), workspace_size);
Workspace workspace{static_cast<dt_byte*>(workspace_cuda),
workspace_size};
opr->exec(tensors_cuda[0], tensors_cuda[1], workspace);
megdnn_free(handle_cuda(), workspace_cuda);

span = tensors_cuda[1].layout.span();
dst = static_cast<dt_byte*>(tensors_orig[1].raw_ptr) +
span.low_byte;
src = static_cast<const dt_byte*>(tensors_cuda[1].raw_ptr) +
span.low_byte;
megdnn_memcpy_D2H(handle_cuda(), dst, src, span.dist_byte());
};

DType dtype = dtype::Float32();
checker.set_tensors_constraint(constraint)
.set_dtype(0, dtype)
.set_dtype(1, dtype)
.set_dtype(2, dtype)
.set_dtype(3, dtype)
.set_param(arg.param)
.exec(TensorShapeArray{ilayout, olayout, olayout, ilayout});
}
}
} // namespace test
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 6
- 10
dnn/test/cuda/conv_bias_int8.cpp View File

@@ -6,7 +6,8 @@
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/ */
#include "megdnn/oprs/nn.h" #include "megdnn/oprs/nn.h"


@@ -37,7 +38,7 @@ std::vector<BenchArgs> get_resnet50_bench_args(size_t batch = 64) {
args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 1}); args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 1});
args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 2}); args.emplace_back(BenchArgs{batch, 256, 56, 56, 32, 3, 2});
args.emplace_back(BenchArgs{batch, 4, 256, 256, 32, 7, 2}); args.emplace_back(BenchArgs{batch, 4, 256, 256, 32, 7, 2});
args.emplace_back(BenchArgs{batch, 256, 56, 56, 64, 1, 1}); args.emplace_back(BenchArgs{batch, 256, 56, 56, 64, 1, 1});
args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 1, 1}); args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 1, 1});
args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 3, 1}); args.emplace_back(BenchArgs{batch, 64, 56, 56, 64, 3, 1});
@@ -614,11 +615,8 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_HSWISH) {
param.stride_h = param.stride_w = 1; param.stride_h = param.stride_w = 1;
param.format = param::ConvBias::Format::CHWN4; param.format = param::ConvBias::Format::CHWN4;
param.nonlineMode = param::ConvBias::NonlineMode::H_SWISH; param.nonlineMode = param::ConvBias::NonlineMode::H_SWISH;
checker.set_param(param).execs({{4, 12, 12, 32, 4},
{4, 3, 3, 16, 4},
{4, 1, 1, 1, 4},
{},
{}});
checker.set_param(param).execs(
{{4, 12, 12, 32, 4}, {4, 3, 3, 16, 4}, {4, 1, 1, 1, 4}, {}, {}});
} }


TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_CHECK_BOUNDS) { TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_CHECK_BOUNDS) {
@@ -1076,7 +1074,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) {
} }





#if CUDA_VERSION >= 10020 #if CUDA_VERSION >= 10020
/// \note: we only check several cases and block sizes in megdnn_test, the full /// \note: we only check several cases and block sizes in megdnn_test, the full
/// testcases are written in cutlass repository /// testcases are written in cutlass repository
@@ -1234,8 +1231,7 @@ TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW4) {
handle_cuda(), get_resnet50_bench_args(64), handle_cuda(), get_resnet50_bench_args(64),
dtype::QuantizedS8{1.2f}, dtype::QuantizedS8{1.3f}, dtype::QuantizedS8{1.2f}, dtype::QuantizedS8{1.3f},
dtype::QuantizedS32{1.2f * 1.3f}, dtype::QuantizedS8{1.0f}, dtype::QuantizedS32{1.2f * 1.3f}, dtype::QuantizedS8{1.0f},
"INT8_NCHW4_DOTPROD_IMPLICIT_GEMM",
param::ConvBias::Format::NCHW4);
"INT8_NCHW4_DOTPROD_IMPLICIT_GEMM", param::ConvBias::Format::NCHW4);
} }
#endif #endif
} // namespace test } // namespace test


+ 15
- 3
imperative/CMakeLists.txt View File

@@ -47,8 +47,7 @@ add_custom_target(gen_opr_py DEPENDS ${GEN_OPS_FILE})


##################### end of opdef generation ######################### ##################### end of opdef generation #########################


set(VERSION_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/src/version.ld)
add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT})
add_custom_target(_version_ld SOURCES ${MGE_VERSION_SCRIPT})


add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11)
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS})
@@ -57,8 +56,21 @@ if (APPLE)
elseif (MSVC OR WIN32) elseif (MSVC OR WIN32)
# Windows does not support implicitly importing data members from DLL. # Windows does not support implicitly importing data members from DLL.
target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn) target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn)
message("-- CMAKE_MSVC_RUNTIME_LIBRARY: ${CMAKE_MSVC_RUNTIME_LIBRARY}")
set_target_properties(${MODULE_NAME} PROPERTIES MSVC_RUNTIME_LIBRARY "${CMAKE_MSVC_RUNTIME_LIBRARY}")
else() else()
target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${VERSION_SCRIPT})
if (MGE_WITH_PYTHON_MODULE)
# use to fix runtime crash when build both mgb(MGE_WITH_PYTHON_MODULE) and imperative(MGE_BUILD_IMPERATIVE_RT)
target_link_libraries(${MODULE_NAME} PRIVATE megengine_export -Wl,--version-script=${MGE_VERSION_SCRIPT})
else()
# use to reduce whl size by depend on megbrain/dnn directly, caused by cmake create two cuda fatbin
# elf section on both megengine_export and target which depend on megengine_export
target_link_libraries(${MODULE_NAME} PRIVATE megbrain megdnn -Wl,--version-script=${MGE_VERSION_SCRIPT})
if (MGE_WITH_DISTRIBUTED)
message("-- Imperative configured to link megray")
target_link_libraries(${MODULE_NAME} PRIVATE megray)
endif()
endif()
endif() endif()


target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) target_include_directories(${MODULE_NAME} PUBLIC src/include PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR})


+ 1
- 1
imperative/python/megengine/__init__.py View File

@@ -76,7 +76,7 @@ from .logger import enable_debug_log, get_logger, set_log_file, set_log_level
from .serialization import load, save from .serialization import load, save
from .tensor import Parameter, Tensor, tensor from .tensor import Parameter, Tensor, tensor
from .version import __version__ from .version import __version__
from .core import cgtools
from .utils import comp_graph_tools as cgtools


_set_fork_exec_path_for_timed_func( _set_fork_exec_path_for_timed_func(
sys.executable, sys.executable,


+ 7
- 5
imperative/python/megengine/autodiff/grad_manager.py View File

@@ -20,7 +20,7 @@ class GradManager:
the forward operations start and when all resources should be released. A typical usage of the forward operations start and when all resources should be released. A typical usage of
GradManager is as follows: GradManager is as follows:


.. codeblock::
.. code-block::


gm = GradManager() gm = GradManager()
gm.attach(model.parameters()) gm.attach(model.parameters())
@@ -32,7 +32,7 @@ class GradManager:


You can also use `record()` and `release()` method instead of `with` context: You can also use `record()` and `release()` method instead of `with` context:


.. codeblock::
.. code-block::


gm = GradManager() gm = GradManager()
gm.attach(model.parameters()) gm.attach(model.parameters())
@@ -50,7 +50,7 @@ class GradManager:
processes. Users will finally get the averaged gradients if an "AllReduce" processes. Users will finally get the averaged gradients if an "AllReduce"
callback is registered as follows: callback is registered as follows:


.. codeblock::
.. code-block::


import megengine.distributed as dist import megengine.distributed as dist


@@ -71,7 +71,7 @@ class GradManager:
r"""Registers parameters that gradients should be calculated with respect to. r"""Registers parameters that gradients should be calculated with respect to.
Callback Functions should have a signature like this: Callback Functions should have a signature like this:


.. codeblock::
.. code-block::


def cb(param: Tensor, grad: Tensor) -> Tensor: def cb(param: Tensor, grad: Tensor) -> Tensor:
# do something # do something
@@ -100,6 +100,8 @@ class GradManager:
:param ys: outputs of forward operators, e.g., the loss tensor :param ys: outputs of forward operators, e.g., the loss tensor
:param dys: derivatives of ys :param dys: derivatives of ys
""" """
from ..functional import ones_like

global backwarding_grad_manager global backwarding_grad_manager
cache = backwarding_grad_manager cache = backwarding_grad_manager
backwarding_grad_manager = self backwarding_grad_manager = self
@@ -113,7 +115,7 @@ class GradManager:
if not isinstance(ys, (tuple, list)): if not isinstance(ys, (tuple, list)):
ys = [ys] ys = [ys]
if dys is None: if dys is None:
dys = [tensor(1.0).broadcast(y.shape) for y in ys]
dys = [ones_like(y) for y in ys]
if not isinstance(dys, (tuple, list)): if not isinstance(dys, (tuple, list)):
dys = [dys] dys = [dys]
try: try:


+ 0
- 1
imperative/python/megengine/core/__init__.py View File

@@ -11,4 +11,3 @@ import sys


from .tensor import Tensor from .tensor import Tensor
from .tensor.megbrain_graph import Graph from .tensor.megbrain_graph import Graph
from .utils import comp_graph_tools as cgtools

+ 3
- 1
imperative/python/megengine/core/_wrap.py View File

@@ -22,11 +22,13 @@ class Device:
else: else:
self._cn = CompNode(device) self._cn = CompNode(device)


self.logical_name = self._cn.logical_name

def to_c(self): def to_c(self):
return self._cn return self._cn


def __repr__(self): def __repr__(self):
return "{}({})".format(type(self).__qualname__, self)
return "{}({})".format(type(self).__qualname__, repr(self._cn))


def __str__(self): def __str__(self):
return str(self._cn) return str(self._cn)


+ 2
- 2
imperative/python/megengine/core/autodiff/builtin_op_utils.py View File

@@ -160,7 +160,7 @@ def subtensor_grad_fn(op, inputs, outputs, input_requires_grad):
def make_grad(grad_op, dy): def make_grad(grad_op, dy):
grad = ( grad = (
TensorWrapper(0, dtype=dy.dtype, device=dy.device) TensorWrapper(0, dtype=dy.dtype, device=dy.device)
.broadcast(TensorWrapper(input_shape))
._broadcast(TensorWrapper(input_shape))
.__wrapped__ .__wrapped__
) )
(dx,) = apply(grad_op, grad, dy, *params) (dx,) = apply(grad_op, grad, dy, *params)
@@ -186,7 +186,7 @@ def indexingMultiAxisVec_grad_fn(op, inputs, outputs, input_requires_grad):
def make_grad(grad_op, dy): def make_grad(grad_op, dy):
grad = ( grad = (
TensorWrapper(0, dtype=dy.dtype, device=dy.device) TensorWrapper(0, dtype=dy.dtype, device=dy.device)
.broadcast(TensorWrapper(input_shape))
._broadcast(TensorWrapper(input_shape))
.__wrapped__ .__wrapped__
) )
(dx,) = apply(grad_op, grad, dy, *params) (dx,) = apply(grad_op, grad, dy, *params)


+ 6
- 6
imperative/python/megengine/core/tensor/function.py View File

@@ -50,8 +50,8 @@ class Function:
""" """
Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses. Applies operations to ``inputs`` and returns results. It must be overriden by all subclasses.


:param input: Input tensors.
:return: A tuple of Tensor or a single Tensor.
:param input: input tensors.
:return: a tuple of Tensor or a single Tensor.


.. note:: .. note::


@@ -64,12 +64,12 @@ class Function:
""" """
Compute the gradient of the forward function. It must be overriden by all subclasses. Compute the gradient of the forward function. It must be overriden by all subclasses.


:param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`
:param output_grads: gradients of outputs that are returned by :meth:`~.function.Function.forward`.


.. note::
.. note::


In case when some tensors of outputs are not related to loss function, the corresponding
values in ``output_grads`` would be ``None``.
In case when some tensors of outputs are not related to loss function, the corresponding
values in ``output_grads`` would be ``None``.


.. note:: .. note::




+ 2
- 2
imperative/python/megengine/core/tensor/indexing.py View File

@@ -173,7 +173,7 @@ def unpack_getitem(inp, tuple_val, *, allow_newaxis=True):
item.append(True) item.append(True)
v = get_index(v) v = get_index(v)
assert np.issubdtype(v.dtype, np.integer) or np.issubdtype( assert np.issubdtype(v.dtype, np.integer) or np.issubdtype(
v.dtype, np.bool
v.dtype, np.bool_
), "var type in the subscript must be int or bool" ), "var type in the subscript must be int or bool"
tensors.append(v) tensors.append(v)


@@ -267,7 +267,7 @@ def setitem(tensor, index, value):
value.shape, tmp_result.shape value.shape, tmp_result.shape
) )
) )
value = value.broadcast(tmp_result.shape)
value = value._broadcast(tmp_result.shape)
if use_subtensor: if use_subtensor:
op = builtin.SetSubtensor(items=items) op = builtin.SetSubtensor(items=items)
else: else:


+ 35
- 6
imperative/python/megengine/core/tensor/megbrain_graph.py View File

@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections import collections
import json import json
import os
import threading import threading
import weakref import weakref
from concurrent.futures import Future, ThreadPoolExecutor from concurrent.futures import Future, ThreadPoolExecutor
@@ -49,7 +50,16 @@ class Graph(_imperative_rt.ComputingGraph):


def execute(self, *args): def execute(self, *args):
assert self._future is None assert self._future is None
self._future = self._executor.submit(self._function.execute, *args)

def wrapped(*args):
try:
self._function.execute(*args)
except Exception as exc:
for i in self._function._all_rendezvous:
i.set_exception(str(exc))
raise exc

self._future = self._executor.submit(wrapped, *args)


def wait(self): def wait(self):
assert self._future is not None assert self._future is not None
@@ -275,6 +285,7 @@ def dump_graph(
keep_param_name: bool = False, keep_param_name: bool = False,
keep_opr_priority: bool = False, keep_opr_priority: bool = False,
strip_info_file=None, strip_info_file=None,
append_json=False
): ):
"""serialize the computing graph of `output_vars` and get byte result. """serialize the computing graph of `output_vars` and get byte result.


@@ -295,6 +306,9 @@ def dump_graph(
:param keep_opr_priority: whether to keep priority setting for operators :param keep_opr_priority: whether to keep priority setting for operators
:param strip_info_file: a string for path or a file handler. if is not None, :param strip_info_file: a string for path or a file handler. if is not None,
then the dump information for code strip would be written to ``strip_info_file`` then the dump information for code strip would be written to ``strip_info_file``
:param append_json: will be check when `strip_info_file` is not None. if set
true, the information for code strip will be append to strip_info_file.
if set false, will rewrite strip_info_file
:return: dump result as byte string, and an instance of namedtuple :return: dump result as byte string, and an instance of namedtuple
:class:`CompGraphDumpResult`, whose fields are: :class:`CompGraphDumpResult`, whose fields are:


@@ -342,10 +356,25 @@ def dump_graph(


if strip_info_file is not None: if strip_info_file is not None:
if isinstance(strip_info_file, str): if isinstance(strip_info_file, str):
strip_info_file = open(strip_info_file, "w")
strip_info = json.loads(_imperative_rt.get_info_for_strip(ov))
strip_info["hash"] = dump_info.content_hash
json.dump(strip_info, strip_info_file)
if not os.path.exists(strip_info_file):
os.mknod(strip_info_file)
strip_info_file = open(strip_info_file, "r+")
new_strip_dict = json.loads(_imperative_rt.get_info_for_strip(ov))
ori_strip_dict = new_strip_dict
json_content = strip_info_file.read()
if append_json and len(json_content) != 0:
# if there are contents in json file. Read them first and then append new information
ori_strip_dict = json.loads(json_content)
for k in ori_strip_dict:
new_strip_dict_v = new_strip_dict.get(k)
if new_strip_dict_v is not None:
for value in new_strip_dict_v:
if not value in ori_strip_dict[k]:
ori_strip_dict[k].append(value)
ori_strip_dict["hash"] = dump_info.content_hash
strip_info_file.seek(0)
strip_info_file.truncate()
json.dump(ori_strip_dict, strip_info_file)


return dump_content, dump_info return dump_content, dump_info


@@ -358,7 +387,7 @@ CompGraphLoadResult = collections.namedtuple(
def load_graph(fpath): def load_graph(fpath):
"""Load a serialized computing graph from file. """Load a serialized computing graph from file.


:parma fpath: Path or Handle for the output file
:param fpath: Path or Handle of the input file
:return: An instance of namedtuple :class:`CompGraphLoadResult`, :return: An instance of namedtuple :class:`CompGraphLoadResult`,
whose fields are: whose fields are:




+ 3
- 1
imperative/python/megengine/core/tensor/multipledispatch/conflict.py View File

@@ -40,6 +40,8 @@
# All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved. # All Megvii Modifications are Copyright (C) 2014-2020 Megvii Inc. All rights reserved.
# -------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------


from collections import OrderedDict

from .utils import _toposort, groupby from .utils import _toposort, groupby
from .variadic import isvariadic from .variadic import isvariadic


@@ -159,5 +161,5 @@ def ordering(signatures):
for s in signatures: for s in signatures:
if s not in edges: if s not in edges:
edges[s] = [] edges[s] = []
edges = dict((k, [b for a, b in v]) for k, v in edges.items())
edges = OrderedDict((k, [b for a, b in v]) for k, v in edges.items())
return _toposort(edges) return _toposort(edges)

+ 2
- 0
imperative/python/megengine/core/tensor/raw_tensor/__init__.py View File

@@ -100,6 +100,8 @@ def _(data: DeviceTensorND):
@as_raw_tensor.register(np.ndarray) @as_raw_tensor.register(np.ndarray)
def _(array: np.ndarray, dtype=None, device=None): def _(array: np.ndarray, dtype=None, device=None):
device = None if device is None else as_device(device).to_c() device = None if device is None else as_device(device).to_c()
if 0 in array.strides:
array = array.squeeze().reshape(array.shape)
return RawTensor(put(array, dtype=dtype, device=device)) return RawTensor(put(array, dtype=dtype, device=device))






+ 65
- 3
imperative/python/megengine/core/tensor/tensor_wrapper.py View File

@@ -57,7 +57,29 @@ def _transpose(data, axes):




def _broadcast(inp, shape): def _broadcast(inp, shape):
def valid_broadcast(src, tar):
def failed():
raise ValueError(
"the input shape {} can not be broadcasted to target shape {}".format(
src, tar
)
)

if isinstance(src, (TensorBase, TensorWrapperBase)):
src = src.numpy()

if isinstance(tar, (TensorBase, TensorWrapperBase)):
tar = tar.numpy()

if len(src) > len(tar):
failed()

for i in range(min(len(src), len(tar))):
if src[-i - 1] != 1 and src[-i - 1] != tar[-i - 1]:
failed()

shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device) shape = utils.astensor1d(shape, inp, dtype="int32", device=inp.device)
valid_broadcast(inp.shape, shape)
(result,) = apply(builtin.Broadcast(), inp, shape) (result,) = apply(builtin.Broadcast(), inp, shape)
return result return result


@@ -158,6 +180,10 @@ def _reduce(mode):
def f(self, axis=None, keepdims: bool = False): def f(self, axis=None, keepdims: bool = False):
data = self data = self
(data,) = utils.convert_inputs(data) (data,) = utils.convert_inputs(data)
if mode == "MEAN":
data = data.astype("float32")
elif self.dtype == np.bool_:
data = data.astype("int32")
if axis is None: if axis is None:
data = data.reshape(-1) data = data.reshape(-1)
assert not keepdims, "can not set axis=None and keepdims=True" assert not keepdims, "can not set axis=None and keepdims=True"
@@ -180,6 +206,9 @@ def _reduce(mode):


if not keepdims: if not keepdims:
result = _remove_axis(result, axis) result = _remove_axis(result, axis)
if self.dtype == np.bool_:
if mode in ["MIN", "MAX"]:
result = result.astype("bool")
return result return result


return f return f
@@ -203,7 +232,8 @@ def _todo(*_):
def _expand_args(args): def _expand_args(args):
if len(args) == 1: if len(args) == 1:
if isinstance( if isinstance(
args[0], (collections.abc.Sequence, TensorBase, TensorWrapperBase)
args[0],
(collections.abc.Sequence, TensorBase, TensorWrapperBase, np.ndarray),
): ):
args = args[0] args = args[0]
return args return args
@@ -366,7 +396,8 @@ class ArrayMethodMixin(abc.ABC):
def reshape(self, *args): def reshape(self, *args):
return _reshape(self, _expand_args(args)) return _reshape(self, _expand_args(args))


def broadcast(self, *args):
# FIXME: remove this method
def _broadcast(self, *args):
return _broadcast(self, _expand_args(args)) return _broadcast(self, _expand_args(args))


def transpose(self, *args): def transpose(self, *args):
@@ -377,7 +408,38 @@ class ArrayMethodMixin(abc.ABC):
def flatten(self): def flatten(self):
return self.reshape(-1) return self.reshape(-1)


sum = _reduce("SUM")
def sum(self, axis=None, keepdims: bool = False):
r"""Returns the sum of each row of the input tensor in the given dimension ``axis``.
If ``axis`` is a list of axises, reduce over all of them.

If ``keepdims`` is ``True``, the shape of output tensor is the same as the input tensor, except in the dimension(s) ``axis`` where it is of size 1. Otherwise, ``axis`` is squeezed(see :meth:`~.functional.tensor.squeeze`).

Same for prod/mean/max/min.

:param axis: the dimension or dimensions to reduce.
:param keepdim: whether the output tensor has ndim retained or not.
:return: output tensor.

Examples:

.. testcode::

from megengine import tensor
a = tensor([False, True, True, False])
b = tensor([1.0, 2.0, 3.0, 4.0])
print(a.sum().numpy())
print(b.sum().numpy())

Outputs:

.. testoutput::

[2]
[10.]

"""
return _reduce("SUM")(self, axis, keepdims)

prod = _reduce("PRODUCT") prod = _reduce("PRODUCT")
min = _reduce("MIN") min = _reduce("MIN")
max = _reduce("MAX") max = _reduce("MAX")


+ 68
- 33
imperative/python/megengine/core/tensor/utils.py View File

@@ -16,39 +16,74 @@ from ..ops.special import Const
from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply from ..tensor.core import OpBase, TensorBase, TensorWrapperBase, apply




def dtype_promotion(raw_inputs):
def add_dtype(i):
if type(i) == int:
return np.array(i, dtype=np.int32)
if type(i) == float:
return np.array(i, dtype=np.float32)
if type(i) == bool:
return np.array(i, dtype=np.bool_)
return None

scalar_inputs = [
add_dtype(i) for i in raw_inputs if not hasattr(i, "dtype") and add_dtype(i)
]
inputs = [i for i in raw_inputs if hasattr(i, "dtype")]
assert len(scalar_inputs + inputs) > 0
dtype = None
if len(inputs) > 0:
dtype = np.result_type(*inputs)
dtype_all = np.result_type(*(inputs + scalar_inputs))
assert (
dtype != np.float64 and dtype != np.int64
), "unsupport dtype {} by dtype_promotion, please use explict type convert".format(
dtype
)
if dtype_all == np.bool_:
for i in raw_inputs:
if not hasattr(i, "dtype") or i.dtype != np.bool_:
raise TypeError(
"bool dtype can not be operated with an element without bool dtype"
)
if dtype_all == np.float64:
dtype_all = np.float32
return dtype_all
def dtype_promotion(inputs):
"""
Returns the dtype that would result from performing an arithmetic
operation on the provided input tensors and scalars.
"""
# map numpy.dtype.kind to priority
category_priority = {
"f": 3, # floating-point
"i": 2, # signed integer
"u": 2, # unsigned integer
"b": 1, # boolean
}

def scalar2dtype(x):
"""
For scalar `x`, returns its corresponding type. A floating point scalar
has dtype 'float32'. An integral non-boolean scalar has dtype 'int32'.
A boolean scalar has dtype 'bool'.
"""
if isinstance(x, bool):
return np.bool_
if isinstance(x, int):
return np.int32
if isinstance(x, float):
return np.float32

def promote_types(types, cat):
"""
Returns the data type with sufficient size to hold all types of
category `cat` in the list `types`.
"""
used_types = [
i for i in types if category_priority.get(np.dtype(i).kind, 0) == cat
]
assert len(used_types) > 0
res = used_types[0]
for i in used_types:
res = np.promote_types(res, i)
return res

def max_priority(types):
"""
Returns the maximum value of the priority of each type in the list
`types`.
"""
if not types:
return 0
else:
return max([category_priority.get(np.dtype(i).kind, 0) for i in types])

scalars = []
tensors = []

for data in inputs:
if hasattr(data, "dtype"):
tensors.append(data.dtype)
elif isinstance(data, (float, int, bool)):
scalars.append(scalar2dtype(data))

max_pri_scalars = max_priority(scalars)
max_pri_tensors = max_priority(tensors)

assert max_pri_scalars > 0 or max_pri_tensors > 0

if max_pri_scalars > max_pri_tensors:
return promote_types(scalars, max_pri_scalars)
else:
return promote_types(tensors, max_pri_tensors)




def get_device(inputs): def get_device(inputs):


+ 0
- 9
imperative/python/megengine/core/utils/__init__.py View File

@@ -1,9 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from .comp_graph_tools import *

+ 6
- 1
imperative/python/megengine/data/_queue.py View File

@@ -26,7 +26,7 @@ def _clear_plasma_store():
# `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess, # `_PlasmaStoreManager.__del__` will not be called automaticly in subprocess,
# so this function should be called explicitly # so this function should be called explicitly
global MGE_PLASMA_STORE_MANAGER global MGE_PLASMA_STORE_MANAGER
if MGE_PLASMA_STORE_MANAGER is not None:
if MGE_PLASMA_STORE_MANAGER is not None and MGE_PLASMA_STORE_MANAGER.refcount == 0:
del MGE_PLASMA_STORE_MANAGER del MGE_PLASMA_STORE_MANAGER
MGE_PLASMA_STORE_MANAGER = None MGE_PLASMA_STORE_MANAGER = None


@@ -50,6 +50,7 @@ class _PlasmaStoreManager:
stderr=None if debug_flag else subprocess.DEVNULL, stderr=None if debug_flag else subprocess.DEVNULL,
) )
self.__initialized = True self.__initialized = True
self.refcount = 1


def __del__(self): def __del__(self):
if self.__initialized and self.plasma_store.returncode is None: if self.__initialized and self.plasma_store.returncode is None:
@@ -83,6 +84,8 @@ class PlasmaShmQueue:
"Exception happened in starting plasma_store: {}\n" "Exception happened in starting plasma_store: {}\n"
"Tips: {}".format(str(e), err_info) "Tips: {}".format(str(e), err_info)
) )
else:
MGE_PLASMA_STORE_MANAGER.refcount += 1


self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name self.socket_name = MGE_PLASMA_STORE_MANAGER.socket_name


@@ -133,6 +136,8 @@ class PlasmaShmQueue:
def close(self): def close(self):
self.queue.close() self.queue.close()
self.disconnect_client() self.disconnect_client()
global MGE_PLASMA_STORE_MANAGER
MGE_PLASMA_STORE_MANAGER.refcount -= 1
_clear_plasma_store() _clear_plasma_store()


def cancel_join_thread(self): def cancel_join_thread(self):


+ 4
- 4
imperative/python/megengine/data/collator.py View File

@@ -34,14 +34,14 @@ default_collate_err_msg_format = (


class Collator: class Collator:
r""" r"""
Used for merge a list of samples to form a mini-batch of Tenor(s). Used when using batched loading from a dataset.
modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py
Used for merging a list of samples to form a mini-batch of Tensor(s). Used when using batched loading from a dataset.
Modified from https://github.com/pytorch/pytorch/blob/master/torch/utils/data/_utils/collate.py
""" """


def apply(self, inputs): def apply(self, inputs):
""" """
input : sequence_N(tuple(CHW, C, CK))
output : tuple(NCHW, NC, NCK)
:param input: sequence_N(tuple(CHW, C, CK)).
:return: tuple(NCHW, NC, NCK).
""" """
elem = inputs[0] elem = inputs[0]
elem_type = type(elem) elem_type = type(elem)


+ 6
- 6
imperative/python/megengine/data/dataloader.py View File

@@ -43,7 +43,7 @@ class DataLoader:
): ):
r"""Provides a convenient way to iterate on a given dataset. r"""Provides a convenient way to iterate on a given dataset.


`DataLoader` combines a dataset with sampler, transform and collator,
`DataLoader` combines a dataset with `sampler`, `transform` and `collator`,
make it flexible to get minibatch continually from a dataset. make it flexible to get minibatch continually from a dataset.


:type dataset: Dataset :type dataset: Dataset
@@ -53,21 +53,21 @@ class DataLoader:
If specified, :attr:`shuffle` must be ``False``. If specified, :attr:`shuffle` must be ``False``.
:type transform: Transform :type transform: Transform
:param transform: defined the transforming strategy for a sampled batch. :param transform: defined the transforming strategy for a sampled batch.
(default: ``None``)
Default: None
:type collator: Collator :type collator: Collator
:param collator: defined the merging strategy for a transformed batch. :param collator: defined the merging strategy for a transformed batch.
(default: ``None``)
Default: None
:type num_workers: int :type num_workers: int
:param num_workers: the number of sub-process to load, transform and collate :param num_workers: the number of sub-process to load, transform and collate
the batch. ``0`` means using single-process. (default: ``0``)
the batch. ``0`` means using single-process. Default: 0
:type timeout: int :type timeout: int
:param timeout: if positive, means the timeout value(second) for collecting a :param timeout: if positive, means the timeout value(second) for collecting a
batch from workers. (default: 0)
batch from workers. Default: 0
:type divide: bool :type divide: bool
:param divide: define the paralleling strategy in multi-processing mode. :param divide: define the paralleling strategy in multi-processing mode.
``True`` means one batch is divided into :attr:`num_workers` pieces, and ``True`` means one batch is divided into :attr:`num_workers` pieces, and
the workers will process these pieces parallelly. ``False`` means the workers will process these pieces parallelly. ``False`` means
different sub-process will process different batch. (default: ``False``)
different sub-process will process different batch. Default: False


""" """




+ 5
- 5
imperative/python/megengine/data/dataset/meta_dataset.py View File

@@ -12,7 +12,7 @@ from typing import Tuple


class Dataset(ABC): class Dataset(ABC):
r""" r"""
An abstract class for all Datasets
An abstract class for all Datasets.
""" """


@abstractmethod @abstractmethod
@@ -22,8 +22,8 @@ class Dataset(ABC):


class MapDataset(Dataset): class MapDataset(Dataset):
r""" r"""
An abstract class for map data
__getitem__ and __len__ method are aditionally needed
An abstract class for map data.
__getitem__ and __len__ method are aditionally needed.
""" """


@abstractmethod @abstractmethod
@@ -41,8 +41,8 @@ class MapDataset(Dataset):


class StreamDataset(Dataset): class StreamDataset(Dataset):
r""" r"""
An abstract class for stream data
__iter__ method is aditionally needed
An abstract class for stream data.
__iter__ method is aditionally needed.
""" """


@abstractmethod @abstractmethod


+ 1
- 1
imperative/python/megengine/data/dataset/vision/cifar.py View File

@@ -21,7 +21,7 @@ logger = get_logger(__name__)




class CIFAR10(VisionDataset): class CIFAR10(VisionDataset):
r""" ``Dataset`` for CIFAR10 meta data
r""" ``Dataset`` for CIFAR10 meta data.
""" """


url_path = "http://www.cs.utoronto.ca/~kriz/" url_path = "http://www.cs.utoronto.ca/~kriz/"


+ 1
- 1
imperative/python/megengine/data/dataset/vision/coco.py View File

@@ -118,7 +118,7 @@ class COCO(VisionDataset):
self.ids = ids self.ids = ids


self.json_category_id_to_contiguous_id = { self.json_category_id_to_contiguous_id = {
v: i + 1 for i, v in enumerate(self.cats.keys())
v: i + 1 for i, v in enumerate(sorted(self.cats.keys()))
} }


self.contiguous_category_id_to_json_id = { self.contiguous_category_id_to_json_id = {


+ 6
- 7
imperative/python/megengine/data/dataset/vision/folder.py View File

@@ -30,19 +30,18 @@ class ImageFolder(VisionDataset):
r""" r"""
ImageFolder is a class for loading image data and labels from a organized folder. ImageFolder is a class for loading image data and labels from a organized folder.


the folder is expected to be organized as followed
root/cls/xxx.img_ext
The folder is expected to be organized as followed: root/cls/xxx.img_ext


labels are indices of sorted classes in the root directory
Labels are indices of sorted classes in the root directory.


:param root: root directory of an image folder
:param root: root directory of an image folder.
:param loader: a function used to load image from path, :param loader: a function used to load image from path,
if ``None``, default function that loads if ``None``, default function that loads
images with PILwill be called
images with PIL will be called.
:param check_valid_func: a function used to check if files in folder are :param check_valid_func: a function used to check if files in folder are
expected image files, if ``None``, default function expected image files, if ``None``, default function
that checks file extensions will be called
:param class_name: if ``True``, return class name instead of class index
that checks file extensions will be called.
:param class_name: if ``True``, return class name instead of class index.


""" """
super().__init__(root, order=("image", "image_category")) super().__init__(root, order=("image", "image_category"))


+ 9
- 9
imperative/python/megengine/data/dataset/vision/imagenet.py View File

@@ -31,7 +31,7 @@ logger = get_logger(__name__)


class ImageNet(ImageFolder): class ImageNet(ImageFolder):
r""" r"""
Load ImageNet from raw files or folder, expected folder looks like
Load ImageNet from raw files or folder. Expected folder looks like:


.. code-block:: bash .. code-block:: bash


@@ -60,25 +60,25 @@ class ImageNet(ImageFolder):


def __init__(self, root: str = None, train: bool = True, **kwargs): def __init__(self, root: str = None, train: bool = True, **kwargs):
r""" r"""
initialization:
Initialization:


* if ``root`` contains ``self.target_folder`` depent on ``train``:
* if ``root`` contains ``self.target_folder`` depending on ``train``:


* initialize ImageFolder with target_folder
* initialize ImageFolder with target_folder.


* else: * else:


* if all raw files are in ``root``: * if all raw files are in ``root``:


* parse ``self.target_folder`` from raw files
* initialize ImageFolder with ``self.target_folder``
* parse ``self.target_folder`` from raw files.
* initialize ImageFolder with ``self.target_folder``.


* else: * else:


* raise error
* raise error.


:param root: root directory of imagenet data, if root is ``None``, used default_dataset_root
:param train: if ``True``, load the train split, otherwise load the validation split
:param root: root directory of imagenet data, if root is ``None``, use default_dataset_root.
:param train: if ``True``, load the train split, otherwise load the validation split.
""" """


# process the root path # process the root path


+ 7
- 7
imperative/python/megengine/data/dataset/vision/mnist.py View File

@@ -22,12 +22,12 @@ logger = get_logger(__name__)




class MNIST(VisionDataset): class MNIST(VisionDataset):
r""" ``Dataset`` for MNIST meta data
r""" ``Dataset`` for MNIST meta data.
""" """


url_path = "http://yann.lecun.com/exdb/mnist/" url_path = "http://yann.lecun.com/exdb/mnist/"
""" """
url prefix for downloading raw file
Url prefix for downloading raw file.
""" """
raw_file_name = [ raw_file_name = [
"train-images-idx3-ubyte.gz", "train-images-idx3-ubyte.gz",
@@ -36,7 +36,7 @@ class MNIST(VisionDataset):
"t10k-labels-idx1-ubyte.gz", "t10k-labels-idx1-ubyte.gz",
] ]
""" """
raw file names of both training set and test set (10k)
Raw file names of both training set and test set (10k).
""" """
raw_file_md5 = [ raw_file_md5 = [
"f68b3c2dcbeaaa9fbdd348bbdeb94873", "f68b3c2dcbeaaa9fbdd348bbdeb94873",
@@ -45,7 +45,7 @@ class MNIST(VisionDataset):
"ec29112dd5afa0611ce80d1b7f02629c", "ec29112dd5afa0611ce80d1b7f02629c",
] ]
""" """
md5 for checking raw files
Md5 for checking raw files.
""" """


def __init__( def __init__(
@@ -57,10 +57,10 @@ class MNIST(VisionDataset):
): ):
r""" r"""
:param root: path for mnist dataset downloading or loading, if ``None``, :param root: path for mnist dataset downloading or loading, if ``None``,
set ``root`` to the ``_default_root``
:param train: if ``True``, loading trainingset, else loading test set
set ``root`` to the ``_default_root``.
:param train: if ``True``, loading trainingset, else loading test set.
:param download: if raw files do not exists and download sets to ``True``, :param download: if raw files do not exists and download sets to ``True``,
download raw files and process, otherwise raise ValueError, default is True
download raw files and process, otherwise raise ValueError, default is True.


""" """
super().__init__(root, order=("image", "image_category")) super().__init__(root, order=("image", "image_category"))


+ 1
- 1
imperative/python/megengine/data/dataset/vision/objects365.py View File

@@ -81,7 +81,7 @@ class Objects365(VisionDataset):
self.ids = ids self.ids = ids


self.json_category_id_to_contiguous_id = { self.json_category_id_to_contiguous_id = {
v: i + 1 for i, v in enumerate(self.cats.keys())
v: i + 1 for i, v in enumerate(sorted(self.cats.keys()))
} }


self.contiguous_category_id_to_json_id = { self.contiguous_category_id_to_json_id = {


+ 15
- 25
imperative/python/megengine/data/dataset/vision/voc.py View File

@@ -75,6 +75,8 @@ class PascalVOC(VisionDataset):
else: else:
raise NotImplementedError raise NotImplementedError


self.img_infos = dict()

def __getitem__(self, index): def __getitem__(self, index):
target = [] target = []
for k in self.order: for k in self.order:
@@ -107,9 +109,8 @@ class PascalVOC(VisionDataset):
mask = mask[:, :, np.newaxis] mask = mask[:, :, np.newaxis]
target.append(mask) target.append(mask)
elif k == "info": elif k == "info":
if image is None:
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR)
info = [image.shape[0], image.shape[1], self.file_names[index]]
info = self.get_img_info(index, image)
info = [info["height"], info["width"], info["file_name"]]
target.append(info) target.append(info)
else: else:
raise NotImplementedError raise NotImplementedError
@@ -119,6 +120,17 @@ class PascalVOC(VisionDataset):
def __len__(self): def __len__(self):
return len(self.images) return len(self.images)


def get_img_info(self, index, image=None):
if index not in self.img_infos:
if image is None:
image = cv2.imread(self.images[index], cv2.IMREAD_COLOR)
self.img_infos[index] = dict(
height=image.shape[0],
width=image.shape[1],
file_name=self.file_names[index],
)
return self.img_infos[index]

def _trans_mask(self, mask): def _trans_mask(self, mask):
label = np.ones(mask.shape[:2]) * 255 label = np.ones(mask.shape[:2]) * 255
for i in range(len(self.class_colors)): for i in range(len(self.class_colors)):
@@ -171,25 +183,3 @@ class PascalVOC(VisionDataset):
"train", "train",
"tvmonitor", "tvmonitor",
) )
class_colors = [
[0, 0, 128],
[0, 128, 0],
[0, 128, 128],
[128, 0, 0],
[128, 0, 128],
[128, 128, 0],
[128, 128, 128],
[0, 0, 64],
[0, 0, 192],
[0, 128, 64],
[0, 128, 192],
[128, 0, 64],
[128, 0, 192],
[128, 128, 64],
[128, 128, 192],
[0, 64, 0],
[0, 64, 128],
[0, 192, 0],
[0, 192, 128],
[128, 64, 0],
]

+ 18
- 18
imperative/python/megengine/data/sampler.py View File

@@ -28,25 +28,25 @@ class Sampler(ABC):
seed=None, seed=None,
): ):
r""" r"""
An abstract class for all sampler
An abstract class for all sampler.


:type dataset: `dataset` :type dataset: `dataset`
:param dataset: dataset to sample from
:param dataset: dataset to sample from.
:type batch_size: positive integer :type batch_size: positive integer
:param batch_size: batch size for batch method
:param batch_size: batch size for batch method.
:type drop_last: bool :type drop_last: bool
:param drop_last: set ``True`` to drop the last incomplete batch, :param drop_last: set ``True`` to drop the last incomplete batch,
if the dataset size is not divisible by the batch size. If ``False`` and if the dataset size is not divisible by the batch size. If ``False`` and
the size of dataset is not divisible by the batch_size, then the last batch will the size of dataset is not divisible by the batch_size, then the last batch will
be smaller. (default: ``False``)
be smaller. Default: False
:type num_samples: positive integer :type num_samples: positive integer
:param num_samples: number of samples assigned to one rank
:param num_samples: number of samples assigned to one rank.
:type world_size: positive integer :type world_size: positive integer
:param world_size: number of ranks
:param world_size: number of ranks.
:type rank: non-negative integer within 0 and world_size :type rank: non-negative integer within 0 and world_size
:param rank: rank id, non-negative interger within 0 and ``world_size``
:param rank: rank id, non-negative interger within 0 and ``world_size``.
:type seed: non-negative integer :type seed: non-negative integer
:param seed: seed for random operators
:param seed: seed for random operators.
""" """
if ( if (
not isinstance(batch_size, int) not isinstance(batch_size, int)
@@ -103,15 +103,15 @@ class Sampler(ABC):


def sample(self): def sample(self):
""" """
return a list contains all sample indices
Return a list contains all sample indices.
""" """
raise NotImplementedError raise NotImplementedError


def scatter(self, indices) -> List: def scatter(self, indices) -> List:
r""" r"""
scatter method is used for splitting indices into subset, each subset
Scatter method is used for splitting indices into subset, each subset
will be assigned to a rank. Indices are evenly splitted by default. will be assigned to a rank. Indices are evenly splitted by default.
If customized indices assignment method is needed, please rewrite this method
If customized indices assignment method is needed, please rewrite this method.
""" """
total_size = self.num_samples * self.world_size total_size = self.num_samples * self.world_size


@@ -127,7 +127,7 @@ class Sampler(ABC):


def batch(self) -> Iterator[List[Any]]: def batch(self) -> Iterator[List[Any]]:
r""" r"""
batch method provides a batch indices generator
Batch method provides a batch indices generator.
""" """
indices = list(self.sample()) indices = list(self.sample())


@@ -156,7 +156,7 @@ class SequentialSampler(Sampler):
rank=None, rank=None,
): ):
r""" r"""
Sample elements sequentially
Sample elements sequentially.
""" """
super().__init__(dataset, batch_size, drop_last, None, world_size, rank) super().__init__(dataset, batch_size, drop_last, None, world_size, rank)
if indices is not None and not isinstance(indices, collections.abc.Sequence): if indices is not None and not isinstance(indices, collections.abc.Sequence):
@@ -168,7 +168,7 @@ class SequentialSampler(Sampler):


def sample(self) -> Iterator[Any]: def sample(self) -> Iterator[Any]:
r""" r"""
return a generator
Return a generator.
""" """
if self.indices is None: if self.indices is None:
return iter(range(len(self.dataset))) return iter(range(len(self.dataset)))
@@ -188,7 +188,7 @@ class RandomSampler(Sampler):
seed=None, seed=None,
): ):
r""" r"""
Sample elements randomly without replacement
Sample elements randomly without replacement.
""" """
super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed) super().__init__(dataset, batch_size, drop_last, None, world_size, rank, seed)
if indices is not None and not isinstance(indices, collections.abc.Sequence): if indices is not None and not isinstance(indices, collections.abc.Sequence):
@@ -218,10 +218,10 @@ class ReplacementSampler(Sampler):
seed=None, seed=None,
): ):
r""" r"""
Sample elements randomly with replacement
Sample elements randomly with replacement.


:type weights: List :type weights: List
:param weights: weights for sampling indices, it could be unnormalized weights
:param weights: weights for sampling indices, it could be unnormalized weights.
""" """
super().__init__( super().__init__(
dataset, batch_size, drop_last, num_samples, world_size, rank, seed dataset, batch_size, drop_last, num_samples, world_size, rank, seed
@@ -250,7 +250,7 @@ class ReplacementSampler(Sampler):




class Infinite(Sampler): class Infinite(Sampler):
r"""Infinite Sampler warper for basic sampler"""
r"""Infinite Sampler warper for basic sampler."""


def sample(self): def sample(self):
raise NotImplementedError("sample method not supported in Infinite") raise NotImplementedError("sample method not supported in Infinite")


+ 1
- 1
imperative/python/megengine/data/transform/meta_transform.py View File

@@ -12,7 +12,7 @@ from typing import Sequence, Tuple


class Transform(ABC): class Transform(ABC):
""" """
rewrite apply method in subclass
Rewrite apply method in subclass.
""" """


def apply_batch(self, inputs: Sequence[Tuple]): def apply_batch(self, inputs: Sequence[Tuple]):


+ 31
- 27
imperative/python/megengine/data/transform/vision/functional.py View File

@@ -15,7 +15,7 @@ import numpy as np




def wrap_keepdims(func): def wrap_keepdims(func):
"""Wraper to keep the dimension of input images unchanged"""
"""Wraper to keep the dimension of input images unchanged."""


@functools.wraps(func) @functools.wraps(func)
def wrapper(image, *args, **kwargs): def wrapper(image, *args, **kwargs):
@@ -34,10 +34,10 @@ def wrap_keepdims(func):
@wrap_keepdims @wrap_keepdims
def to_gray(image): def to_gray(image):
r""" r"""
Change BGR format image's color space to gray
Change BGR format image's color space to gray.


:param image: Input BGR format image, with (H, W, C) shape
:return: Gray format image, with (H, W, C) shape
:param image: input BGR format image, with `(H, W, C)` shape.
:return: gray format image, with `(H, W, C)` shape.
""" """
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


@@ -45,10 +45,10 @@ def to_gray(image):
@wrap_keepdims @wrap_keepdims
def to_bgr(image): def to_bgr(image):
r""" r"""
Change gray format image's color space to BGR
Change gray format image's color space to BGR.


:param image: input Gray format image, with (H, W, C) shape
:return: BGR format image, with (H, W, C) shape
:param image: input Gray format image, with `(H, W, C)` shape.
:return: BGR format image, with `(H, W, C)` shape.
""" """
return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)


@@ -56,18 +56,18 @@ def to_bgr(image):
@wrap_keepdims @wrap_keepdims
def pad(input, size, value): def pad(input, size, value):
r""" r"""
Pad input data with *value* and given *size*
Pad input data with *value* and given *size*.


:param input: Input data, with (H, W, C) shape
:param size: Padding size of input data, it could be integer or sequence.
If it's an integer, the input data will be padded in four directions.
If it's a sequence contains two integer, the bottom and right side
:param input: input data, with `(H, W, C)` shape.
:param size: padding size of input data, it could be integer or sequence.
If it is an integer, the input data will be padded in four directions.
If it is a sequence contains two integer, the bottom and right side
of input data will be padded. of input data will be padded.
If it's a sequence contains four integer, the top, bottom, left, right
If it is a sequence contains four integer, the top, bottom, left, right
side of input data will be padded with given size. side of input data will be padded with given size.
:param value: Padding value of data, could be a sequence of int or float.
if it's float value, the dtype of image will be casted to float32 also.
:return: Padded image
:param value: padding value of data, could be a sequence of int or float.
If it is float value, the dtype of image will be casted to float32 also.
:return: padded image.
""" """
if isinstance(size, int): if isinstance(size, int):
size = (size, size, size, size) size = (size, size, size, size)
@@ -81,14 +81,18 @@ def pad(input, size, value):
@wrap_keepdims @wrap_keepdims
def flip(image, flipCode): def flip(image, flipCode):
r""" r"""
Accordding to the flipCode (the type of flip), flip the input image
Accordding to the flipCode (the type of flip), flip the input image.


:param image: Input image, with (H, W, C) shape
:param image: input image, with `(H, W, C)` shape.
:param flipCode: code that indicates the type of flip. :param flipCode: code that indicates the type of flip.
1 : Flip horizontally
0 : Flip vertically
-1 : Flip horizontally and vertically
:return: BGR format image, with (H, W, C) shape

* 1 : Flip horizontally

* 0 : Flip vertically

* -1: Flip horizontally and vertically

:return: BGR format image, with `(H, W, C)` shape.
""" """
return cv2.flip(image, flipCode=flipCode) return cv2.flip(image, flipCode=flipCode)


@@ -96,12 +100,12 @@ def flip(image, flipCode):
@wrap_keepdims @wrap_keepdims
def resize(input, size, interpolation=cv2.INTER_LINEAR): def resize(input, size, interpolation=cv2.INTER_LINEAR):
r""" r"""
resize the input data to given size
Resize the input data to given size.


:param input: Input data, could be image or masks, with (H, W, C) shape
:param size: Target size of input data, with (height, width) shape.
:param interpolation: Interpolation method.
:return: Resized data, with (H, W, C) shape
:param input: input data, could be image or masks, with `(H, W, C)` shape.
:param size: target size of input data, with (height, width) shape.
:param interpolation: interpolation method.
:return: resized data, with `(H, W, C)` shape.
""" """
if len(size) != 2: if len(size) != 2:
raise ValueError("resize needs (h, w), but got {}".format(size)) raise ValueError("resize needs (h, w), but got {}".format(size))


+ 68
- 68
imperative/python/megengine/data/transform/vision/transform.py View File

@@ -44,26 +44,26 @@ __all__ = [
class VisionTransform(Transform): class VisionTransform(Transform):
r""" r"""
Base class of all transforms used in computer vision. Base class of all transforms used in computer vision.
calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*()
Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*()
method. If you want to implement a self-defined transform method for image, method. If you want to implement a self-defined transform method for image,
rewrite _apply_image method in subclass. rewrite _apply_image method in subclass.


:param order: Input type order. Input is a tuple contains different structures,
:param order: input type order. Input is a tuple containing different structures,
order is used to specify the order of structures. For example, if your input order is used to specify the order of structures. For example, if your input
is (image, boxes) type, then the order should be ("image", "boxes").
Current available strings & data type are describe below:
is (image, boxes) type, then the ``order`` should be ("image", "boxes").
Current available strings and data type are describe below:


* "image": input image, with shape of (H, W, C)
* "coords": coordinates, with shape of (N, 2)
* "boxes": bounding boxes, with shape of (N, 4), "xyxy" format,
* "image": input image, with shape of `(H, W, C)`.
* "coords": coordinates, with shape of `(N, 2)`.
* "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format,
the 1st "xy" represents top left point of a box, the 1st "xy" represents top left point of a box,
the 2nd "xy" represents right bottom point. the 2nd "xy" represents right bottom point.
* "mask": map used for segmentation, with shape of (H, W, 1)
* "keypoints": keypoints with shape of (N, K, 3), N for number of instances,
* "mask": map used for segmentation, with shape of `(H, W, 1)`.
* "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances,
and K for number of keypoints in one instance. The first two dimensions and K for number of keypoints in one instance. The first two dimensions
of last axis is coordinate of keypoints and the the 3rd dimension is of last axis is coordinate of keypoints and the the 3rd dimension is
the label of keypoints. the label of keypoints.
* "polygons": A sequence contains numpy array, its length is number of instances.
* "polygons": a sequence containing numpy arrays, its length is the number of instances.
Each numpy array represents polygon coordinate of one instance. Each numpy array represents polygon coordinate of one instance.
* "category": categories for some data type. For example, "image_category" * "category": categories for some data type. For example, "image_category"
means category of the input image and "boxes_category" means categories of means category of the input image and "boxes_category" means categories of
@@ -94,11 +94,11 @@ class VisionTransform(Transform):
self.order = order self.order = order


def apply_batch(self, inputs: Sequence[Tuple]): def apply_batch(self, inputs: Sequence[Tuple]):
r"""Apply transform on batch input data"""
r"""Apply transform on batch input data."""
return tuple(self.apply(input) for input in inputs) return tuple(self.apply(input) for input in inputs)


def apply(self, input: Tuple): def apply(self, input: Tuple):
r"""Apply transform on single input data"""
r"""Apply transform on single input data."""
if not isinstance(input, tuple): if not isinstance(input, tuple):
input = (input,) input = (input,)


@@ -156,10 +156,10 @@ class VisionTransform(Transform):
class ToMode(VisionTransform): class ToMode(VisionTransform):
r"""Change input data to a target mode. r"""Change input data to a target mode.
For example, most transforms use HWC mode image, For example, most transforms use HWC mode image,
while the Neural Network might use CHW mode input tensor
while the neural network might use CHW mode input tensor.


:param mode: Output mode of input. Use "CHW" mode by default.
:param order: The same with :class:`VisionTransform`
:param mode: output mode of input. Default: "CHW"
:param order: the same with :class:`VisionTransform`
""" """


def __init__(self, mode="CHW", *, order=None): def __init__(self, mode="CHW", *, order=None):
@@ -185,14 +185,14 @@ class Compose(VisionTransform):
r""" r"""
Composes several transforms together. Composes several transforms together.


:param transforms: List of :class:`VisionTransform` to compose.
:param batch_compose: Whether use shuffle_indices for batch data or not.
:param transforms: list of :class:`VisionTransform` to compose.
:param batch_compose: whether use shuffle_indices for batch data or not.
If True, use original input sequence. If True, use original input sequence.
Otherwise, the shuffle_indices will be used for transforms. Otherwise, the shuffle_indices will be used for transforms.
:param shuffle_indices: Indices used for random shuffle, start at 1.
:param shuffle_indices: indices used for random shuffle, start at 1.
For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform For example, if shuffle_indices is [(1, 3), (2, 4)], then the 1st and 3rd transform
will be random shuffled, the 2nd and 4th transform will also be shuffled. will be random shuffled, the 2nd and 4th transform will also be shuffled.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`


Examples: Examples:


@@ -264,8 +264,8 @@ class TorchTransformCompose(VisionTransform):
some transforms with tensor in torchvision are not supported, some transforms with tensor in torchvision are not supported,
such as Normalize and ToTensor in torchvision. such as Normalize and ToTensor in torchvision.


:param transforms: The same with ``Compose``
:param order: The same with :class:`VisionTransform`
:param transforms: the same with ``Compose``.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, transforms, *, order=None): def __init__(self, transforms, *, order=None):
@@ -303,16 +303,16 @@ class TorchTransformCompose(VisionTransform):
class Pad(VisionTransform): class Pad(VisionTransform):
r"""Pad the input data. r"""Pad the input data.


:param size: Padding size of input image, it could be integer or sequence.
If it's an integer, the input image will be padded in four directions.
If it's a sequence contains two integer, the bottom and right side
:param size: padding size of input image, it could be integer or sequence.
If it is an integer, the input image will be padded in four directions.
If it is a sequence containing two integers, the bottom and right side
of image will be padded. of image will be padded.
If it's a sequence contains four integer, the top, bottom, left, right
If it is a sequence containing four integers, the top, bottom, left, right
side of image will be padded with given size. side of image will be padded with given size.
:param value: Padding value of image, could be a sequence of int or float.
if it's float value, the dtype of image will be casted to float32 also.
:param mask_value: Padding value of segmentation map.
:param order: The same with :class:`VisionTransform`
:param value: padding value of image, could be a sequence of int or float.
if it is float value, the dtype of image will be casted to float32 also.
:param mask_value: padding value of segmentation map.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, size=0, value=0, mask_value=0, *, order=None): def __init__(self, size=0, value=0, mask_value=0, *, order=None):
@@ -350,15 +350,15 @@ class Pad(VisionTransform):
class Resize(VisionTransform): class Resize(VisionTransform):
r"""Resize the input data. r"""Resize the input data.


:param output_size: Target size of image, with (height, width) shape.
:param interpolation: Interpolation method. All methods are listed below:
:param output_size: target size of image, with (height, width) shape.
:param interpolation: interpolation method. All methods are listed below:


* cv2.INTER_NEAREST – a nearest-neighbor interpolation. * cv2.INTER_NEAREST – a nearest-neighbor interpolation.
* cv2.INTER_LINEAR – a bilinear interpolation (used by default). * cv2.INTER_LINEAR – a bilinear interpolation (used by default).
* cv2.INTER_AREA – resampling using pixel area relation. * cv2.INTER_AREA – resampling using pixel area relation.
* cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood.
* cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None): def __init__(self, output_size, interpolation=cv2.INTER_LINEAR, *, order=None):
@@ -476,8 +476,8 @@ class ShortestEdgeResize(VisionTransform):
class RandomResize(VisionTransform): class RandomResize(VisionTransform):
r"""Resize the input data randomly. r"""Resize the input data randomly.


:param scale_range: .
:param order: The same with :class:`VisionTransform`
:param scale_range: range of scaling.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None): def __init__(self, scale_range, interpolation=cv2.INTER_LINEAR, *, order=None):
@@ -519,13 +519,13 @@ class RandomResize(VisionTransform):


class RandomCrop(VisionTransform): class RandomCrop(VisionTransform):
r"""Crop the input data randomly. Before applying the crop transform, r"""Crop the input data randomly. Before applying the crop transform,
pad the image first. And if target size is still bigger than the size of
pad the image first. If target size is still bigger than the size of
padded image, pad the image size to target size. padded image, pad the image size to target size.


:param output_size: Target size of output image, with (height, width) shape.
:param padding_size: The same with `size` in ``Pad``
:param padding_value: The same with `value` in ``Pad``
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param padding_size: the same with `size` in ``Pad``.
:param padding_value: the same with `value` in ``Pad``.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__( def __init__(
@@ -580,10 +580,10 @@ class RandomResizedCrop(VisionTransform):
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made.
After applying crop transfrom, the input data will be resized to given size. After applying crop transfrom, the input data will be resized to given size.


:param output_size: Target size of output image, with (height, width) shape.
:param scale_range: Range of size of the origin size cropped. Default: (0.08, 1.0)
:param ratio_range: Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param scale_range: range of size of the origin size cropped. Default: (0.08, 1.0)
:param ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
:param order: the same with :class:`VisionTransform`.
""" """


def __init__( def __init__(
@@ -666,8 +666,8 @@ class RandomResizedCrop(VisionTransform):
class CenterCrop(VisionTransform): class CenterCrop(VisionTransform):
r"""Crops the given the input data at the center. r"""Crops the given the input data at the center.


:param output_size: Target size of output image, with (height, width) shape.
:param order: The same with :class:`VisionTransform`
:param output_size: target size of output image, with (height, width) shape.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, output_size, *, order=None): def __init__(self, output_size, *, order=None):
@@ -710,7 +710,7 @@ class RandomHorizontalFlip(VisionTransform):
r"""Horizontally flip the input data randomly with a given probability. r"""Horizontally flip the input data randomly with a given probability.


:param p: probability of the input data being flipped. Default: 0.5 :param p: probability of the input data being flipped. Default: 0.5
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, prob: float = 0.5, *, order=None): def __init__(self, prob: float = 0.5, *, order=None):
@@ -742,7 +742,7 @@ class RandomVerticalFlip(VisionTransform):
r"""Vertically flip the input data randomly with a given probability. r"""Vertically flip the input data randomly with a given probability.


:param p: probability of the input data being flipped. Default: 0.5 :param p: probability of the input data being flipped. Default: 0.5
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, prob: float = 0.5, *, order=None): def __init__(self, prob: float = 0.5, *, order=None):
@@ -776,9 +776,9 @@ class Normalize(VisionTransform):
this transform will normalize each channel of the input data. this transform will normalize each channel of the input data.
``output[channel] = (input[channel] - mean[channel]) / std[channel]`` ``output[channel] = (input[channel] - mean[channel]) / std[channel]``


:param mean: Sequence of means for each channel.
:param std: Sequence of standard deviations for each channel.
:param order: The same with :class:`VisionTransform`
:param mean: sequence of means for each channel.
:param std: sequence of standard deviations for each channel.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, mean=0.0, std=1.0, *, order=None): def __init__(self, mean=0.0, std=1.0, *, order=None):
@@ -802,7 +802,7 @@ class GaussianNoise(VisionTransform):


:param mean: Gaussian mean used to generate noise. :param mean: Gaussian mean used to generate noise.
:param std: Gaussian standard deviation used to generate noise. :param std: Gaussian standard deviation used to generate noise.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`
""" """


def __init__(self, mean=0.0, std=1.0, *, order=None): def __init__(self, mean=0.0, std=1.0, *, order=None):
@@ -826,9 +826,9 @@ class GaussianNoise(VisionTransform):
class BrightnessTransform(VisionTransform): class BrightnessTransform(VisionTransform):
r"""Adjust brightness of the input data. r"""Adjust brightness of the input data.


:param value: How much to adjust the brightness. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the brightness. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, value, *, order=None): def __init__(self, value, *, order=None):
@@ -857,9 +857,9 @@ class BrightnessTransform(VisionTransform):
class ContrastTransform(VisionTransform): class ContrastTransform(VisionTransform):
r"""Adjust contrast of the input data. r"""Adjust contrast of the input data.


:param value: How much to adjust the contrast. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the contrast. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, value, *, order=None): def __init__(self, value, *, order=None):
@@ -888,9 +888,9 @@ class ContrastTransform(VisionTransform):
class SaturationTransform(VisionTransform): class SaturationTransform(VisionTransform):
r"""Adjust saturation of the input data. r"""Adjust saturation of the input data.


:param value: How much to adjust the saturation. Can be any
non negative number. 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the saturation. Can be any
non negative number. 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, value, *, order=None): def __init__(self, value, *, order=None):
@@ -919,9 +919,9 @@ class SaturationTransform(VisionTransform):
class HueTransform(VisionTransform): class HueTransform(VisionTransform):
r"""Adjust hue of the input data. r"""Adjust hue of the input data.


:param value: How much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image
:param order: The same with :class:`VisionTransform`
:param value: how much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image.
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, value, *, order=None): def __init__(self, value, *, order=None):
@@ -957,19 +957,19 @@ class HueTransform(VisionTransform):
class ColorJitter(VisionTransform): class ColorJitter(VisionTransform):
r"""Randomly change the brightness, contrast, saturation and hue of an image. r"""Randomly change the brightness, contrast, saturation and hue of an image.


:param brightness: How much to jitter brightness.
:param brightness: how much to jitter brightness.
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers. or the given [min, max]. Should be non negative numbers.
:param contrast: How much to jitter contrast.
:param contrast: how much to jitter contrast.
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non negative numbers. or the given [min, max]. Should be non negative numbers.
:param saturation: How much to jitter saturation.
:param saturation: how much to jitter saturation.
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers. or the given [min, max]. Should be non negative numbers.
:param hue: How much to jitter hue.
:param hue: how much to jitter hue.
Chosen uniformly from [-hue, hue] or the given [min, max]. Chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
:param order: The same with :class:`VisionTransform`
:param order: the same with :class:`VisionTransform`.
""" """


def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None): def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, *, order=None):


+ 8
- 9
imperative/python/megengine/device.py View File

@@ -7,6 +7,7 @@
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os import os
import re


from .core._imperative_rt.common import CompNode, DeviceType from .core._imperative_rt.common import CompNode, DeviceType
from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config
@@ -22,10 +23,8 @@ __all__ = [




def _valid_device(inp): def _valid_device(inp):
if isinstance(inp, str) and len(inp) == 4:
if inp[0] in {"x", "c", "g"} and inp[1:3] == "pu":
if inp[3] == "x" or inp[3].isdigit():
return True
if isinstance(inp, str) and re.match("^[cxg]pu(\d+|\d+:\d+|x)$", inp):
return True
return False return False




@@ -71,11 +70,11 @@ def set_default_device(device: str = "xpux"):


'multithread' device type is avaliable when inference, which implements 'multithread' device type is avaliable when inference, which implements
multi-threading parallelism at the operator level. For example, multi-threading parallelism at the operator level. For example,
'multithread4' will compute with 4 threads. which implements
'multithread4' will compute with 4 threads.


The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available. The default value is 'xpux' to specify any device available. The priority of using gpu is higher when both gpu and cpu are available.


It can also be set by environmental variable `MGE_DEFAULT_DEVICE`.
It can also be set by environment variable `MGE_DEFAULT_DEVICE`.
""" """
assert _valid_device(device), "Invalid device name {}".format(device) assert _valid_device(device), "Invalid device name {}".format(device)
CompNode._set_default_device(device) CompNode._set_default_device(device)
@@ -99,13 +98,13 @@ def set_prealloc_config(
growth_factor=2.0, growth_factor=2.0,
device_type=DeviceType.CUDA, device_type=DeviceType.CUDA,
): ):
"""specifies how to pre-allocate from raw dev allocator
"""Specifies how to pre-allocate from raw device allocator.


:param alignment: specifies the alignment in bytes. :param alignment: specifies the alignment in bytes.
:param min_req: min request size in bytes. :param min_req: min request size in bytes.
:param max_overhead: max overhead above required size in bytes. :param max_overhead: max overhead above required size in bytes.
:growth_factor: request size / cur allocated
:device_type: the device type
:param growth_factor: `request size / cur allocated`
:param device_type: the device type


""" """
assert alignment > 0 assert alignment > 0


+ 49
- 49
imperative/python/megengine/distributed/functional.py View File

@@ -102,7 +102,7 @@ def _(op: RemoteRecv):




def collective_comm(inp, mode, group, device): def collective_comm(inp, mode, group, device):
"""Helper function for applying collective communication functions"""
"""Helper function for applying collective communication functions."""
assert isinstance(group, Group) assert isinstance(group, Group)
if group is None: if group is None:
return inp return inp
@@ -123,11 +123,11 @@ def collective_comm(inp, mode, group, device):
def reduce_sum( def reduce_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create reduce_sum operator for collective communication
"""Create reduce_sum operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.REDUCE_SUM mode = CollectiveCommMode.REDUCE_SUM
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -136,11 +136,11 @@ def reduce_sum(
def broadcast( def broadcast(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create broadcast operator for collective communication
"""Create broadcast operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.BROADCAST mode = CollectiveCommMode.BROADCAST
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -149,11 +149,11 @@ def broadcast(
def all_gather( def all_gather(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create all_gather operator for collective communication
"""Create all_gather operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.ALL_GATHER mode = CollectiveCommMode.ALL_GATHER
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -162,11 +162,11 @@ def all_gather(
def reduce_scatter_sum( def reduce_scatter_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create reduce_scatter_sum operator for collective communication
"""Create reduce_scatter_sum operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.REDUCE_SCATTER_SUM mode = CollectiveCommMode.REDUCE_SCATTER_SUM
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -175,11 +175,11 @@ def reduce_scatter_sum(
def all_reduce_sum( def all_reduce_sum(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create all_reduce_sum operator for collective communication
"""Create all_reduce_sum operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.ALL_REDUCE_SUM mode = CollectiveCommMode.ALL_REDUCE_SUM
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -188,11 +188,11 @@ def all_reduce_sum(
def all_reduce_max( def all_reduce_max(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create all_reduce_max operator for collective communication
"""Create all_reduce_max operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.ALL_REDUCE_MAX mode = CollectiveCommMode.ALL_REDUCE_MAX
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -201,11 +201,11 @@ def all_reduce_max(
def all_reduce_min( def all_reduce_min(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create all_reduce_min operator for collective communication
"""Create all_reduce_min operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.ALL_REDUCE_MIN mode = CollectiveCommMode.ALL_REDUCE_MIN
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -214,11 +214,11 @@ def all_reduce_min(
def gather( def gather(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create gather operator for collective communication
"""Create gather operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.GATHER mode = CollectiveCommMode.GATHER
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -227,11 +227,11 @@ def gather(
def scatter( def scatter(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create scatter operator for collective communication
"""Create scatter operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.SCATTER mode = CollectiveCommMode.SCATTER
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)
@@ -240,21 +240,21 @@ def scatter(
def all_to_all( def all_to_all(
inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = "" inp: Tensor, group: Optional[Group] = WORLD, device: Optional[str] = ""
) -> Tensor: ) -> Tensor:
"""Create all_to_all operator for collective communication
"""Create all_to_all operator for collective communication.


:param inp: input tensor
:param group: communication group
:param device: execute placement
:param inp: input tensor.
:param group: communication group.
:param device: execution device.
""" """
mode = CollectiveCommMode.ALL_TO_ALL mode = CollectiveCommMode.ALL_TO_ALL
return collective_comm(inp, mode, group, device) return collective_comm(inp, mode, group, device)




def remote_send(inp: Tensor, dest_rank: int) -> Tensor: def remote_send(inp: Tensor, dest_rank: int) -> Tensor:
"""Send a Tensor to a remote process
"""Send a Tensor to a remote process.


:param inp: tensor to send
:param dest_rank: destination process rank
:param inp: tensor to send.
:param dest_rank: destination process rank.
""" """
op = RemoteSend() op = RemoteSend()
op.key = "{}->{}".format(get_rank(), dest_rank) op.key = "{}->{}".format(get_rank(), dest_rank)
@@ -266,12 +266,12 @@ def remote_send(inp: Tensor, dest_rank: int) -> Tensor:
def remote_recv( def remote_recv(
src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None src_rank: int, shape: Tuple[int], dtype: type, device: Optional[str] = None
) -> Tensor: ) -> Tensor:
"""Receive a Tensor from a remote process
"""Receive a Tensor from a remote process.


:param src_rank: source process rank
:param shape: the shape of the tensor to receive
:param dtype: the data type of the tensor to receive
:param device: the device to place the received tensor
:param src_rank: source process rank.
:param shape: the shape of the tensor to receive.
:param dtype: the data type of the tensor to receive.
:param device: the device to place the received tensor.
""" """
key = "{}->{}".format(src_rank, get_rank()) key = "{}->{}".format(src_rank, get_rank())




+ 15
- 15
imperative/python/megengine/distributed/group.py View File

@@ -83,12 +83,12 @@ def init_process_group(
) -> None: ) -> None:
"""Initialize the distributed process group and specify the device used in the current process """Initialize the distributed process group and specify the device used in the current process


:param master_ip: IP address of the master node
:param port: Port available for all processes to communicate
:param world_size: Total number of processes participating in the job
:param rank: Rank of the current process
:param device: The GPU device id to bind this process to
:param backend: Communicator backend, currently support 'nccl' and 'ucx'
:param master_ip: ip address of the master node.
:param port: port available for all processes to communicate.
:param world_size: total number of processes participating in the job.
:param rank: rank of the current process.
:param device: the GPU device id to bind this process to.
:param backend: communicator backend, currently support 'nccl' and 'ucx'.
""" """
if not isinstance(master_ip, str): if not isinstance(master_ip, str):
raise TypeError("Expect type str but got {}".format(type(master_ip))) raise TypeError("Expect type str but got {}".format(type(master_ip)))
@@ -127,50 +127,50 @@ def init_process_group(




def is_distributed() -> bool: def is_distributed() -> bool:
"""Return True if the distributed process group has been initialized"""
"""Return True if the distributed process group has been initialized."""
return _sd is not None return _sd is not None




def get_rank() -> int: def get_rank() -> int:
"""Get the rank of the current process"""
"""Get the rank of the current process."""
return _sd.proc_rank if _sd is not None else 0 return _sd.proc_rank if _sd is not None else 0




def get_world_size() -> int: def get_world_size() -> int:
"""Get the total number of processes participating in the job"""
"""Get the total number of processes participating in the job."""
return _sd.world_size if _sd is not None else 1 return _sd.world_size if _sd is not None else 1




def get_backend() -> str: def get_backend() -> str:
"""Get the backend str"""
"""Get the backend str."""
assert _sd is not None, "please call init_process_group first" assert _sd is not None, "please call init_process_group first"
return _sd.backend if _sd is not None else None return _sd.backend if _sd is not None else None




def get_py_server_addr() -> Tuple[str, int]: def get_py_server_addr() -> Tuple[str, int]:
"""Get master_ip and port of python XML RPC server"""
"""Get master_ip and port of python XML RPC server."""
assert _sd is not None, "please call init_process_group first" assert _sd is not None, "please call init_process_group first"
return _sd.master_ip, _sd.py_server_port return _sd.master_ip, _sd.py_server_port




def get_mm_server_addr() -> Tuple[str, int]: def get_mm_server_addr() -> Tuple[str, int]:
"""Get master_ip and port of C++ mm_server"""
"""Get master_ip and port of C++ mm_server."""
assert _sd is not None, "please call init_process_group first" assert _sd is not None, "please call init_process_group first"
return _sd.master_ip, _sd.mm_server_port return _sd.master_ip, _sd.mm_server_port




def get_client() -> Client: def get_client() -> Client:
"""Get client of python XML RPC server"""
"""Get client of python XML RPC server."""
assert _sd is not None, "please call init_process_group first" assert _sd is not None, "please call init_process_group first"
return _sd.client return _sd.client




def new_group(proc_ranks: List[int]) -> Group: def new_group(proc_ranks: List[int]) -> Group:
"""Build a subgroup containing certain ranks"""
"""Build a subgroup containing certain ranks."""
return Group(proc_ranks) return Group(proc_ranks)




def group_barrier(group: Optional[Group] = WORLD) -> None: def group_barrier(group: Optional[Group] = WORLD) -> None:
"""Block until all ranks in the group reach this barrier"""
"""Block until all ranks in the group reach this barrier."""
assert isinstance(group, Group) assert isinstance(group, Group)
_sd.client.group_barrier(group.key, group.size) _sd.client.group_barrier(group.key, group.size)

+ 132
- 13
imperative/python/megengine/distributed/helper.py View File

@@ -17,11 +17,112 @@ import numpy as np
from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager from megengine.autodiff.grad_manager import GradManager, get_backwarding_grad_manager
from megengine.device import get_default_device, get_device_count from megengine.device import get_default_device, get_device_count


from ..functional.param_pack import get_offsets, pack_allreduce_split
from ..core.ops.builtin import ParamPackConcat, ParamPackSplit
from ..core.tensor.core import apply
from ..functional.utils import copy from ..functional.utils import copy
from ..tensor import Tensor
from ..utils.future import Future from ..utils.future import Future
from .functional import all_reduce_sum, broadcast from .functional import all_reduce_sum, broadcast
from .group import WORLD, group_barrier, is_distributed
from .group import WORLD, Group, group_barrier, is_distributed


def param_pack_split(inp: Tensor, offsets: list, shapes: list):
r"""
Returns split tensor to tensor list as offsets and shapes described,
only used for ``parampack``.

:param inp: input tensor.
:param offsets: offsets of outputs, length of `2 * n`,
while n is tensor nums you want to split,
format `[begin0, end0, begin1, end1]`.
:param shapes: tensor shapes of outputs.
:return: splitted tensors.

Examples:

.. testcode::

import numpy as np
from megengine import tensor
from megengine.distributed.helper import param_pack_split

a = tensor(np.ones((10,), np.int32))
b, c = param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)])
print(b.numpy())
print(c.numpy())

Outputs:

.. testoutput::

[1]
[[1 1 1]
[1 1 1]
[1 1 1]]

"""
op = ParamPackSplit()
op.offsets = offsets
op.shapes = shapes
return apply(op, inp)


def param_pack_concat(inps: list, offsets: Tensor, offsets_val: list):
r"""
Returns concated tensor, only used for ``parampack``.

:param inps: input tensors.
:param offsets: device value of offsets.
:param offsets_val: offsets of inputs, length of `2 * n`,
format `[begin0, end0, begin1, end1]`.
:return: concated tensor.

Examples:

.. testcode::

import numpy as np
from megengine import tensor
from megengine.distributed.helper import param_pack_concat

a = tensor(np.ones((1,), np.int32))
b = tensor(np.ones((3, 3), np.int32))
offsets_val = [0, 1, 1, 10]
offsets = tensor(offsets_val, np.int32)
c = param_pack_concat([a, b], offsets, offsets_val)
print(c.numpy())

Outputs:

.. testoutput::

[1 1 1 1 1 1 1 1 1 1]

"""
op = ParamPackConcat()
op.offsets = offsets_val
return apply(op, *inps, offsets)[0]


def get_offsets(shapes):
offsets = []
offset = 0
for shape in shapes:
offsets.append(offset)
offset += int(np.prod(shape))
offsets.append(offset)
return offsets


def pack_allreduce_split(pack_list, shapes, group, reduce_method):
offsets_val = get_offsets(shapes)
offsets = Tensor(offsets_val)
packed_grads = param_pack_concat(pack_list, offsets, offsets_val)
packed_grads = all_reduce_sum(packed_grads, group, group.comp_node)
if reduce_method == "mean":
packed_grads /= group.size
grads = param_pack_split(packed_grads, offsets_val, shapes)
return grads




class TensorFuture(Future): class TensorFuture(Future):
@@ -54,28 +155,43 @@ def synchronized(func: Callable):
return wrapper return wrapper




def get_device_count_by_fork(device_type: str):
q = mp.Queue()
def _get_device_count_worker(queue, device_type):
num = get_device_count(device_type)
queue.put(num)


def worker(queue):
num = get_device_count(device_type)
queue.put(num)


p = mp.Process(target=worker, args=(q,))
def get_device_count_by_fork(device_type: str):
"""Get device count in fork thread.
See https://stackoverflow.com/questions/22950047/cuda-initialization-error-after-fork
for more information.
"""
q = mp.Queue()
p = mp.Process(target=_get_device_count_worker, args=(q, device_type))
p.start() p.start()
p.join() p.join()
return q.get() return q.get()




def bcast_list_(params, group):
for p in params:
p._reset(broadcast(p, group))
def bcast_list_(inps: list, group: Group = WORLD):
"""Broadcast tensors between given group.

:param inps: input tensors.
:param group: communication group.
"""
for inp in inps:
inp._reset(broadcast(inp, group))




class AllreduceCallback: class AllreduceCallback:
def __init__(self, reduce_method, group=WORLD):
"""Allreduce Callback with tensor fusion optimization.

:param reduce_method: the method to reduce gradiants.
:param group: communication group.
"""

def __init__(self, reduce_method: str, group: Group = WORLD):
reduce_method = reduce_method.lower() reduce_method = reduce_method.lower()
assert reduce_method in ["sum", "mean"]
assert reduce_method in ["sum", "mean"], "reduce_method should be sum or mean"
self._reduce_method = reduce_method self._reduce_method = reduce_method
self._group = group self._group = group
self._marked_gm = WeakSet() self._marked_gm = WeakSet()
@@ -88,6 +204,7 @@ class AllreduceCallback:
self._futures_dict = dict() self._futures_dict = dict()
self._packing_list = defaultdict(list) self._packing_list = defaultdict(list)
self._packing_size = defaultdict(int) self._packing_size = defaultdict(int)
self._grad_origin_device = dict()


def _pack(self, dtype): def _pack(self, dtype):
grad_list = [self._gradients_dict[p] for p in self._packing_list[dtype]] grad_list = [self._gradients_dict[p] for p in self._packing_list[dtype]]
@@ -109,6 +226,7 @@ class AllreduceCallback:
self._params.append(param) self._params.append(param)
self._futures_dict[param] = TensorFuture(ack=False) self._futures_dict[param] = TensorFuture(ack=False)
self._gradients_dict[param] = grad self._gradients_dict[param] = grad
self._grad_origin_device[param] = str(grad.device)


dtype_str = str(np.dtype(param.dtype)) dtype_str = str(np.dtype(param.dtype))
dtype_size = np.dtype(param.dtype).itemsize dtype_size = np.dtype(param.dtype).itemsize
@@ -123,6 +241,7 @@ class AllreduceCallback:
self._pack(dtype) self._pack(dtype)
for param in self._params: for param in self._params:
grad = self._gradients_dict[param] grad = self._gradients_dict[param]
grad = copy(grad, self._grad_origin_device[param])
self._futures_dict[param].set(grad) self._futures_dict[param].set(grad)
self._reset() self._reset()




+ 2
- 2
imperative/python/megengine/distributed/launcher.py View File

@@ -15,7 +15,7 @@ from .util import get_free_ports




def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs): def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs):
"""init distributed process group and run wrapped function"""
"""Init distributed process group and run wrapped function."""
init_process_group( init_process_group(
master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=dev
) )
@@ -23,7 +23,7 @@ def _run_wrapped(func, master_ip, port, world_size, rank, dev, args, kwargs):




def launcher(func): def launcher(func):
"""decorator for launching multiple processes in single-machine multi-gpu training"""
"""Decorator for launching multiple processes in single-machine multi-gpu training."""


n_gpus = get_device_count_by_fork("gpu") n_gpus = get_device_count_by_fork("gpu")




+ 94
- 38
imperative/python/megengine/distributed/server.py View File

@@ -21,6 +21,12 @@ from .util import get_free_ports




class Methods: class Methods:
"""Distributed Server Method.
Used for exchange information between distributed nodes.

:param mm_server_port: multiple machine rpc server port.
"""

def __init__(self, mm_server_port): def __init__(self, mm_server_port):
self.lock = threading.Lock() self.lock = threading.Lock()
self.mm_server_port = mm_server_port self.mm_server_port = mm_server_port
@@ -31,51 +37,65 @@ class Methods:
self.dict_barrier_event = defaultdict(threading.Event) self.dict_barrier_event = defaultdict(threading.Event)


def connect(self): def connect(self):
"""Method for checking connection success."""
return True return True


def get_mm_server_port(self): def get_mm_server_port(self):
"""Get multiple machine rpc server port."""
return self.mm_server_port return self.mm_server_port


def set_is_grad(self, rank_peer, is_grad):
def set_is_grad(self, key, is_grad):
"""Mark send/recv need gradiants by key.
:param key: key to match send/recv op.
:param is_grad: whether this op need grad.
"""
with self.lock: with self.lock:
future = self.dict_is_grad[rank_peer]
future = self.dict_is_grad[key]
future.set(is_grad) future.set(is_grad)
return True return True


def check_is_grad(self, rank_peer):
def check_is_grad(self, key):
"""Check whether send/recv need gradiants.
:param key: key to match send/recv op.
"""
with self.lock: with self.lock:
future = self.dict_is_grad[rank_peer]
future = self.dict_is_grad[key]
ret = future.get() ret = future.get()
with self.lock: with self.lock:
del self.dict_is_grad[rank_peer]
del self.dict_is_grad[key]
return ret return ret


def set_remote_tracer(self, rank_peer, tracer_set):
def set_remote_tracer(self, key, tracer_set):
"""Set tracer dict for tracing send/recv op.

:param key: key to match send/recv op.
:param tracer_set: valid tracer set.
"""
with self.lock: with self.lock:
future = self.dict_remote_tracer[rank_peer]
future = self.dict_remote_tracer[key]
future.set(tracer_set) future.set(tracer_set)
return True return True


def check_remote_tracer(self, rank_peer):
def check_remote_tracer(self, key):
"""Get tracer dict for send/recv op.
:param key: key to match send/recv op.
"""
with self.lock: with self.lock:
future = self.dict_remote_tracer[rank_peer]
future = self.dict_remote_tracer[key]
ret = future.get() ret = future.get()
with self.lock: with self.lock:
del self.dict_remote_tracer[rank_peer]
del self.dict_remote_tracer[key]
return ret return ret


def set_pack_list(self, key, pack_list):
with self.lock:
future = self.dict_pack_list[key]
future.set(pack_list)
return True

def get_pack_list(self, key):
with self.lock:
future = self.dict_pack_list[key]
return future.get()

def group_barrier(self, key, size): def group_barrier(self, key, size):
"""A barrier wait for all group member.
:param key: group key to match each other.
:param size: group size.
"""
with self.lock: with self.lock:
self.dict_barrier_counter[key] += 1 self.dict_barrier_counter[key] += 1
counter = self.dict_barrier_counter[key] counter = self.dict_barrier_counter[key]
@@ -94,12 +114,23 @@ class ThreadXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer):




def start_server(py_server_port, mm_server_port): def start_server(py_server_port, mm_server_port):
"""Start python distributed server and multiple machine server.
:param py_server_port: python server port.
:param mm_server_port: multiple machine server port.
"""
server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False) server = ThreadXMLRPCServer(("0.0.0.0", py_server_port), logRequests=False)
server.register_instance(Methods(mm_server_port)) server.register_instance(Methods(mm_server_port))
server.serve_forever() server.serve_forever()




class Server: class Server:
"""Distributed Server for distributed training.
Should be running at master node.

:param port: python server port.
"""

def __init__(self, port): def __init__(self, port):
self.py_server_port = get_free_ports(1)[0] if port == 0 else port self.py_server_port = get_free_ports(1)[0] if port == 0 else port
self.mm_server_port = create_mm_server("0.0.0.0", 0) self.mm_server_port = create_mm_server("0.0.0.0", 0)
@@ -112,12 +143,19 @@ class Server:




class Client: class Client:
"""Distributed Client for distributed training.

:param master_ip: ip address of master node.
:param port: port of server at master node.
"""

def __init__(self, master_ip, port): def __init__(self, master_ip, port):
self.master_ip = master_ip self.master_ip = master_ip
self.port = port self.port = port
self.connect() self.connect()


def connect(self): def connect(self):
"""Check connection success."""
while True: while True:
try: try:
self.proxy = ServerProxy( self.proxy = ServerProxy(
@@ -129,25 +167,43 @@ class Client:
time.sleep(1) time.sleep(1)


def get_mm_server_port(self): def get_mm_server_port(self):
"""Get multiple machine server port."""
return self.proxy.get_mm_server_port() return self.proxy.get_mm_server_port()


def set_is_grad(self, rank_peer, is_grad):
self.proxy.set_is_grad(rank_peer, is_grad)

def check_is_grad(self, rank_peer):
return self.proxy.check_is_grad(rank_peer)

def set_remote_tracer(self, rank_peer, tracer_set):
self.proxy.set_remote_tracer(rank_peer, tracer_set)

def check_remote_tracer(self, rank_peer):
return self.proxy.check_remote_tracer(rank_peer)

def set_pack_list(self, key, pack_list):
self.proxy.set_pack_list(key, pack_list)

def get_pack_list(self, key):
return self.proxy.get_pack_list(key)
def set_is_grad(self, key, is_grad):
"""Mark send/recv need gradiants by key.
:param key: key to match send/recv op.
:param is_grad: whether this op need grad.
"""
self.proxy.set_is_grad(key, is_grad)

def check_is_grad(self, key):
"""Check whether send/recv need gradiants.
:param key: key to match send/recv op.
"""
return self.proxy.check_is_grad(key)

def set_remote_tracer(self, key, tracer_set):
"""Set tracer dict for tracing send/recv op.

:param key: key to match send/recv op.
:param tracer_set: valid tracer set.
"""
self.proxy.set_remote_tracer(key, tracer_set)

def check_remote_tracer(self, key):
"""Get tracer dict for send/recv op.
:param key: key to match send/recv op.
"""
return self.proxy.check_remote_tracer(key)


def group_barrier(self, key, size): def group_barrier(self, key, size):
"""A barrier wait for all group member.
:param key: group key to match each other.
:param size: group size.
"""
self.proxy.group_barrier(key, size) self.proxy.group_barrier(key, size)

+ 1
- 4
imperative/python/megengine/functional/__init__.py View File

@@ -8,13 +8,10 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# pylint: disable=redefined-builtin # pylint: disable=redefined-builtin
from .elemwise import * from .elemwise import *
from .graph import add_update
from .loss import *
from .math import * from .math import *
from .nn import * from .nn import *
from .quantized import conv_bias_activation
from .tensor import * from .tensor import *
from .utils import accuracy, copy
from .utils import *


from . import distributed # isort:skip from . import distributed # isort:skip




+ 4
- 4
imperative/python/megengine/functional/debug_param.py View File

@@ -26,14 +26,14 @@ def set_conv_execution_strategy(option: str):
Available values: Available values:


* 'HEURISTIC' uses heuristic to choose the fastest algorithm. * 'HEURISTIC' uses heuristic to choose the fastest algorithm.
* 'PROFILE' runs possible algorithms on real device to find the best.
* 'PROFILE_HEURISTIC' uses profile result and heuristic to choose the fastest algorithm.
* 'PROFILE_REPRODUCIBLE' uses the fastest of profile result that is also reproducible.
* 'PROFILE' runs possible algorithms on real device to find the best one.
* 'PROFILE_HEURISTIC' uses profiling result and heuristic to choose the fastest algorithm.
* 'PROFILE_REPRODUCIBLE' uses the fastest of profiling result that is also reproducible.
* 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible. * 'HEURISTIC_REPRODUCIBLE' uses heuristic to choose the fastest algorithm that is also reproducible.


The default strategy is 'HEURISTIC'. The default strategy is 'HEURISTIC'.


It can also be set through the environmental variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'.
It can also be set through the environment variable 'MEGENGINE_CONV_EXECUTION_STRATEGY'.
""" """
valid_option = ( valid_option = (
"HEURISTIC", "HEURISTIC",


+ 73
- 89
imperative/python/megengine/functional/elemwise.py View File

@@ -26,23 +26,22 @@ __all__ = [
"acosh", "acosh",
"atanh", "atanh",
"ceil", "ceil",
"clamp",
"clip",
"cos", "cos",
"cosh", "cosh",
"div", "div",
"eq",
"equal",
"exp", "exp",
"expm1", "expm1",
"fast_tanh",
"floor", "floor",
"floor_div", "floor_div",
"gt",
"ge",
"greater",
"greater_equal",
"hswish", "hswish",
"hsigmoid", "hsigmoid",
"left_shift", "left_shift",
"lt",
"le",
"less",
"less_equal",
"log", "log",
"log1p", "log1p",
"logical_and", "logical_and",
@@ -54,7 +53,7 @@ __all__ = [
"mod", "mod",
"mul", "mul",
"neg", "neg",
"ne",
"not_equal",
"pow", "pow",
"relu", "relu",
"relu6", "relu6",
@@ -88,13 +87,6 @@ def _elwise(*args, mode):
return result return result




def _logical(*args, mode):
op = builtin.CondExecPredLogical(mode=mode)
args = utils.convert_inputs(*args)
(result,) = apply(op, *args)
return result


def _elemwise_multi_type(*args, mode, **kwargs): def _elemwise_multi_type(*args, mode, **kwargs):
op = builtin.ElemwiseMultiType(mode=mode, **kwargs) op = builtin.ElemwiseMultiType(mode=mode, **kwargs)
args = utils.convert_inputs(*args) args = utils.convert_inputs(*args)
@@ -106,9 +98,10 @@ def _elemwise_multi_type(*args, mode, **kwargs):




def add(x, y): def add(x, y):
"""Element-wise addition.
"""Element-wise `addition`.
At least one operand should be tensor. At least one operand should be tensor.
Same for sub/mul/div/floor_div/pow/mod/atan2/eq/ne/lt/le/gt/ge/maximum/minmium.

Same for sub/mul/div/floor_div/pow/mod/atan2/equal/not_equal/less/less_equal/greater/greater_equal/maximum/minmium.


:param x: input tensor. :param x: input tensor.
:return: computed tensor. :return: computed tensor.
@@ -138,68 +131,68 @@ def add(x, y):




def sub(x, y): def sub(x, y):
"""Element-wise subtraction."""
"""Element-wise `subtraction`."""
return _elwise(x, y, mode="sub") return _elwise(x, y, mode="sub")




def mul(x, y): def mul(x, y):
"""Element-wise multiplication."""
"""Element-wise `multiplication`."""
return _elwise(x, y, mode="mul") return _elwise(x, y, mode="mul")




def div(x, y): def div(x, y):
"""Element-wise (x / y)."""
"""Element-wise `(x / y)`."""
return _elwise(x, y, mode="true_div") return _elwise(x, y, mode="true_div")




def floor_div(x, y): def floor_div(x, y):
"""Element-wise floor(x / y)."""
"""Element-wise `floor(x / y)`."""
return _elwise(x, y, mode="floor_divide") return _elwise(x, y, mode="floor_divide")




def neg(x): def neg(x):
"""Element-wise negation."""
"""Element-wise `negation`."""
return _elwise(x, mode="negate") return _elwise(x, mode="negate")




def pow(x, y): def pow(x, y):
"""Element-wise power."""
"""Element-wise `power`."""
return _elwise(x, y, mode="pow") return _elwise(x, y, mode="pow")




def mod(x, y): def mod(x, y):
"""Element-wise remainder of division."""
"""Element-wise `remainder of division`."""
return _elwise(x, y, mode="mod") return _elwise(x, y, mode="mod")




def abs(x): def abs(x):
"""Element-wise absolute value."""
"""Element-wise `absolute value`."""
return _elwise(x, mode="abs") return _elwise(x, mode="abs")




def exp(x): def exp(x):
"""Element-wise exponential."""
"""Element-wise `exponential`."""
return _elwise(x, mode="exp") return _elwise(x, mode="exp")




def expm1(x): def expm1(x):
"""Element-wise exp(x)-1."""
"""Element-wise `exp(x)-1`."""
return _elwise(x, mode="expm1") return _elwise(x, mode="expm1")




def log(x): def log(x):
"""Element-wise logarithm (base `e`)."""
"""Element-wise `logarithm (base e)`."""
return _elwise(x, mode="log") return _elwise(x, mode="log")




def log1p(x): def log1p(x):
"""Element-wise log(x+1) (base `e`)."""
"""Element-wise `log(x+1) (base e)`."""
return _elwise(x, mode="log1p") return _elwise(x, mode="log1p")




def sqrt(x: Tensor) -> Tensor: def sqrt(x: Tensor) -> Tensor:
"""Element-wise sqrt.
For negative input value, return ``NaN``.
"""Element-wise `sqrt`.
Returns ``NaN`` for negative input value.


:param x: input tensor. :param x: input tensor.
:return: computed tensor. :return: computed tensor.
@@ -229,10 +222,10 @@ def sqrt(x: Tensor) -> Tensor:


def square(x: Tensor) -> Tensor: def square(x: Tensor) -> Tensor:
""" """
Return a new tensor with the square of the elements of input tensor.
Returns a new tensor with the square of the elements of input tensor.


:param inp: The input tensor
:return: The computed tensor
:param inp: input tensor.
:return: computed tensor.


Examples: Examples:


@@ -258,27 +251,27 @@ def square(x: Tensor) -> Tensor:




def round(x): def round(x):
"""Element-wise rounding to int."""
"""Element-wise `rounding to int`."""
return _elwise(x, mode="round") return _elwise(x, mode="round")




def ceil(x): def ceil(x):
"""Element-wise ceiling."""
"""Element-wise `ceiling`."""
return _elwise(x, mode="ceil") return _elwise(x, mode="ceil")




def floor(x): def floor(x):
"""Element-wise floor."""
"""Element-wise `floor`."""
return _elwise(x, mode="floor") return _elwise(x, mode="floor")




def maximum(x, y): def maximum(x, y):
"""Element-wise maximum of array elements."""
"""Element-wise `maximum of array elements`."""
return _elwise(x, y, mode="max") return _elwise(x, y, mode="max")




def minimum(x, y): def minimum(x, y):
"""Element-wise minimum of array elements."""
"""Element-wise `minimum of array elements`."""
return _elwise(x, y, mode="min") return _elwise(x, y, mode="min")




@@ -286,7 +279,7 @@ def minimum(x, y):




def cos(x): def cos(x):
"""Element-wise cosine.
"""Element-wise `cosine`.


:param x: input tensor. :param x: input tensor.
:return: computed tensor. :return: computed tensor.
@@ -315,80 +308,71 @@ def cos(x):




def sin(x): def sin(x):
"""Element-wise sine."""
"""Element-wise `sine`."""
return _elwise(x, mode="sin") return _elwise(x, mode="sin")




def tan(x): def tan(x):
"""Element-wise tangent."""
"""Element-wise `tangent`."""
return sin(x) / cos(x) return sin(x) / cos(x)




def acos(x): def acos(x):
"""Element-wise inverse cosine."""
"""Element-wise `inverse cosine`."""
return _elwise(x, mode="acos") return _elwise(x, mode="acos")




def asin(x): def asin(x):
"""Element-wise inverse sine."""
"""Element-wise `inverse sine`."""
return _elwise(x, mode="asin") return _elwise(x, mode="asin")




def atan(x): def atan(x):
"""Element-wise inverse tangent."""
"""Element-wise `inverse tangent`."""
return _elwise(x, 1, mode="atan2") return _elwise(x, 1, mode="atan2")




def atan2(y, x): def atan2(y, x):
"""Element-wise 2-argument arctangent."""
"""Element-wise `2-argument arctangent`."""
return _elwise(y, x, mode="atan2") return _elwise(y, x, mode="atan2")




def cosh(x): def cosh(x):
r"""Element-wise hyperbolic cosine."""
r"""Element-wise `hyperbolic cosine`."""
return 0.5 * (exp(x) + exp(-x)) return 0.5 * (exp(x) + exp(-x))




def sinh(x): def sinh(x):
r"""Element-wise hyperbolic sine."""
r"""Element-wise `hyperbolic sine`."""
u = expm1(x) u = expm1(x)
return 0.5 * u / (u + 1) * (u + 2) return 0.5 * u / (u + 1) * (u + 2)




def tanh(x): def tanh(x):
r"""Element-wise hyperbolic tangent."""
r"""Element-wise `hyperbolic tangent`."""
return _elwise(x, mode="tanh") return _elwise(x, mode="tanh")




def asinh(x): def asinh(x):
r"""Element-wise inverse hyperbolic sine."""
r"""Element-wise `inverse hyperbolic sine`."""
return log(x + (x ** 2 + 1) ** 0.5) return log(x + (x ** 2 + 1) ** 0.5)




def acosh(x): def acosh(x):
r"""Element-wise inverse hyperbolic cosine."""
r"""Element-wise `inverse hyperbolic cosine`."""
return log(x + (x ** 2 - 1) ** 0.5) return log(x + (x ** 2 - 1) ** 0.5)




def atanh(x): def atanh(x):
r"""Element-wise inverse hyperbolic tangent."""
r"""Element-wise `inverse hyperbolic tangent`."""
return log1p(2 * x / (1 - x)) / 2 return log1p(2 * x / (1 - x)) / 2




def fast_tanh(x):
r"""Element-wise fast tanh; this is an approximation:

.. math::
\text{fast_tanh}(x) = x * (27. + x * x) / (27. + 9. * x * x)
"""
return _elwise(x, mode="fast_tanh")


# bit-twiddling functions # bit-twiddling functions




def left_shift(x, y): def left_shift(x, y):
"""Element-wise bitwise binary: x << y.
"""Element-wise `bitwise binary: x << y`.


:param x: input tensor, should be int. :param x: input tensor, should be int.
:param y: how many bits to be left-shifted. :param y: how many bits to be left-shifted.
@@ -418,7 +402,7 @@ def left_shift(x, y):




def right_shift(x, y): def right_shift(x, y):
"""Element-wise bitwise binary: x >> y."""
"""Element-wise `bitwise binary: x >> y`."""
return _elwise(x, y, mode="shr") return _elwise(x, y, mode="shr")




@@ -426,30 +410,30 @@ def right_shift(x, y):




def logical_and(x, y): def logical_and(x, y):
"""Element-wise logical and: x && y."""
"""Element-wise `logical and: x && y`."""
return _elwise(x, y, mode="AND") return _elwise(x, y, mode="AND")




def logical_not(x): def logical_not(x):
"""Element-wise logical not: ~x."""
"""Element-wise `logical not: ~x`."""
return _elwise(x, mode="NOT") return _elwise(x, mode="NOT")




def logical_or(x, y): def logical_or(x, y):
"""Element-wise logical or: x || y."""
"""Element-wise `logical or: x || y`."""
return _elwise(x, y, mode="OR") return _elwise(x, y, mode="OR")




def logical_xor(x, y): def logical_xor(x, y):
"""Element-wise logical xor: x ^ y."""
"""Element-wise `logical xor: x ^ y`."""
return _elwise(x, y, mode="XOR") return _elwise(x, y, mode="XOR")




# comparison functions # comparison functions




def eq(x, y):
"""Element-wise (x == y).
def equal(x, y):
"""Element-wise `(x == y)`.


:param x: input tensor 1. :param x: input tensor 1.
:param y: input tensor 2. :param y: input tensor 2.
@@ -465,7 +449,7 @@ def eq(x, y):


x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) x = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3))
y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) y = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3))
out = F.eq(x, y)
out = F.equal(x, y)
print(out.numpy()) print(out.numpy())


Outputs: Outputs:
@@ -479,28 +463,28 @@ def eq(x, y):
return _elwise(x, y, mode="eq") return _elwise(x, y, mode="eq")




def ne(x, y):
"""Element-wise (x != y)."""
def not_equal(x, y):
"""Element-wise `(x != y)`."""
return x != y return x != y




def lt(x, y):
"""Element-wise (x < y)."""
def less(x, y):
"""Element-wise `(x < y)`."""
return _elwise(x, y, mode="lt") return _elwise(x, y, mode="lt")




def le(x, y):
"""Element-wise (x <= y)."""
def less_equal(x, y):
"""Element-wise `(x <= y)`."""
return _elwise(x, y, mode="leq") return _elwise(x, y, mode="leq")




def gt(x, y):
"""Element-wise (x > y)."""
def greater(x, y):
"""Element-wise `(x > y)`."""
return _elwise(y, x, mode="lt") return _elwise(y, x, mode="lt")




def ge(x, y):
"""Element-wise (x >= y)."""
def greater_equal(x, y):
"""Element-wise `(x >= y)`."""
return _elwise(y, x, mode="leq") return _elwise(y, x, mode="leq")




@@ -508,7 +492,7 @@ def ge(x, y):




def hswish(x): def hswish(x):
"""Element-wise x * relu6(x + 3) / 6.
"""Element-wise `x * relu6(x + 3) / 6`.


:param x: input tensor. :param x: input tensor.
:return: computed tensor. :return: computed tensor.
@@ -534,7 +518,7 @@ def hswish(x):




def hsigmoid(x): def hsigmoid(x):
"""Element-wise relu6(x + 3) / 6."""
"""Element-wise `relu6(x + 3) / 6`."""
return relu6(x + 3) / 6 return relu6(x + 3) / 6




@@ -544,16 +528,16 @@ def relu(x):




def relu6(x): def relu6(x):
"""Element-wise min(max(x, 0), 6)."""
"""Element-wise `min(max(x, 0), 6)`."""
return minimum(maximum(x, 0), 6) return minimum(maximum(x, 0), 6)




def sigmoid(x): def sigmoid(x):
"""Element-wise 1 / ( 1 + exp( -x ) )."""
"""Element-wise `1 / ( 1 + exp( -x ) )`."""
return _elwise(x, mode="sigmoid") return _elwise(x, mode="sigmoid")




def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
def clip(x: Tensor, lower=None, upper=None) -> Tensor:
r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns r"""Clamps all elements in input tensor into the range `[` :attr:`lower`, :attr:`upper` `]` and returns
a resulting tensor: a resulting tensor:


@@ -578,9 +562,9 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
import megengine.functional as F import megengine.functional as F


a = tensor(np.arange(5).astype(np.int32)) a = tensor(np.arange(5).astype(np.int32))
print(F.clamp(a, 2, 4).numpy())
print(F.clamp(a, lower=3).numpy())
print(F.clamp(a, upper=3).numpy())
print(F.clip(a, 2, 4).numpy())
print(F.clip(a, lower=3).numpy())
print(F.clip(a, upper=3).numpy())


Outputs: Outputs:


@@ -596,7 +580,7 @@ def clamp(x: Tensor, lower=None, upper=None) -> Tensor:
), "At least one of 'lower' or 'upper' must not be None" ), "At least one of 'lower' or 'upper' must not be None"
if lower is not None: if lower is not None:
if upper is not None: if upper is not None:
assert lower <= upper, "clamp lower bound is bigger that upper bound"
assert lower <= upper, "clip lower bound is bigger that upper bound"
return minimum(maximum(x, lower), upper) return minimum(maximum(x, lower), upper)
else: else:
return maximum(x, lower) return maximum(x, lower)


+ 0
- 44
imperative/python/megengine/functional/external.py View File

@@ -1,44 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# pylint: disable=too-many-lines
from typing import List

from ..tensor import Tensor


def cambricon_subgraph(
inputs: List[Tensor], data: bytes, symbol: str, tensor_dim_mutable: bool,
) -> List[Tensor]:
"""Loads a serialized Cambricon subgraph (i.e. cnrtModel_t) and
execute the operations defined in the subgraph.

:param inputs: list of input tensors of the subgraph.
:param data: the serialized subgraph.
:param symbol: the name of the function in the subgraph.
The function is corresponding to a cnmlFusionOp
which is added to the cnmlModel_t/cnrtModel_t.
:param tensor_dim_mutable: whether the input tensors' shapes are mutalbe
in cnrtModel_t.
"""
raise NotImplementedError


def extern_opr_subgraph(
inputs, output_shapes: List[tuple], dump_name: str, dump_data: bytes,
) -> List[Tensor]:
"""Loads a serialized extern opr subgraph and fake execute the operator.

:param inputs: tensor or list of input tensors.
:param output_shapes: the output shapes.
:param dump_name: the serialized subgraph name.
:param dump_data: the serialized subgraph.

:return: list of tensors.
"""
raise NotImplementedError

+ 0
- 41
imperative/python/megengine/functional/graph.py View File

@@ -1,41 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import collections
from typing import Iterable, Optional, Union

from ..tensor import Tensor


def add_update(
dest: Tensor,
delta: Tensor,
*,
alpha: Union[Tensor, float, int] = 1.0,
beta: Union[Tensor, float, int] = 1.0,
bias: Union[Tensor, float, int] = 0.0
):
r"""Modify ``dest`` inplace as follows:

.. math::
dest = alpha * dest + beta * delta + bias

:param dest: input data that will be inplace modified.
:param delta: update value that will be added to ``dest``.
:param alpha: weight ratio of ``dest``. Default: 1.0
:param beta: weight ratio of ``delta``. Default: 1.0
:param bias: bias value appended to the result. Default: 0.0
"""
if beta is not None and beta != 1.0:
delta = delta * beta
if bias is not None and bias != 0.0:
delta = delta + bias
if alpha is not None and alpha != 1.0:
dest *= alpha
dest += delta
return dest

+ 50
- 24
imperative/python/megengine/functional/loss.py View File

@@ -10,14 +10,14 @@ import numpy as np


from ..core.tensor.utils import make_shape_tuple from ..core.tensor.utils import make_shape_tuple
from ..tensor import Tensor from ..tensor import Tensor
from .elemwise import abs, eq, exp, log, maximum, pow, relu
from .nn import indexing_one_hot
from .elemwise import abs, equal, exp, log, maximum, pow, relu
from .nn import indexing_one_hot, logsigmoid, logsumexp
from .tensor import where from .tensor import where


__all__ = [ __all__ = [
"l1_loss", "l1_loss",
"square_loss", "square_loss",
"cross_entropy_with_softmax",
"cross_entropy",
"binary_cross_entropy", "binary_cross_entropy",
"hinge_loss", "hinge_loss",
] ]
@@ -55,7 +55,7 @@ def l1_loss(pred: Tensor, label: Tensor) -> Tensor:


ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
loss = F.l1_loss(ipt, tgt)
loss = F.nn.l1_loss(ipt, tgt)
print(loss.numpy()) print(loss.numpy())


Outputs: Outputs:
@@ -106,7 +106,7 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:


ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32))
tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32))
loss = F.square_loss(ipt, tgt)
loss = F.nn.square_loss(ipt, tgt)
print(loss.numpy()) print(loss.numpy())


Outputs: Outputs:
@@ -120,10 +120,16 @@ def square_loss(pred: Tensor, label: Tensor) -> Tensor:
return (diff ** 2).mean() return (diff ** 2).mean()




def cross_entropy_with_softmax(
pred: Tensor, label: Tensor, axis: int = 1, label_smooth: float = 0
def cross_entropy(
pred: Tensor,
label: Tensor,
axis: int = 1,
with_logits: bool = True,
label_smooth: float = 0,
) -> Tensor: ) -> Tensor:
r"""Returns loss after applying :func:`~.softmax` + :func:`~.cross_entropy`.
r"""Compute the multi-class cross entropy loss (using logits by default).

By default, prediction is assumed to be logits, whose softmax gives probabilities.


It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`.


@@ -132,11 +138,12 @@ def cross_entropy_with_softmax(
.. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K


where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively. where :math:`y^{LS}` and :math:`y` are new label distribution and origin label distribution respectively.
k is the index of label distribution. :math:`\alpha` is label_smooth and :math:`K` is the number of classes.
k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes.


:param pred: input tensor representing the predicted probability. :param pred: input tensor representing the predicted probability.
:param label: input tensor representing the classification label. :param label: input tensor representing the classification label.
:param axis: an axis along which softmax will be applied. Default: 1 :param axis: an axis along which softmax will be applied. Default: 1
:param with_logits: whether to apply softmax first. Default: True
:param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 :param label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0
:return: loss value. :return: loss value.


@@ -150,9 +157,9 @@ def cross_entropy_with_softmax(


data_shape = (1, 2) data_shape = (1, 2)
label_shape = (1, ) label_shape = (1, )
pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(data_shape))
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape))
label = tensor(np.ones(label_shape, dtype=np.int32)) label = tensor(np.ones(label_shape, dtype=np.int32))
loss = F.cross_entropy_with_softmax(pred, label)
loss = F.nn.cross_entropy(pred, label)
print(loss.numpy()) print(loss.numpy())


Outputs: Outputs:
@@ -170,26 +177,41 @@ def cross_entropy_with_softmax(
) )


num_classes = pred.shape[axis] num_classes = pred.shape[axis]
no_label_smooth = (
label_smooth is None or type(label_smooth) in (int, float) and label_smooth == 0
)

if not with_logits:
if no_label_smooth:
return -log(indexing_one_hot(pred, label, axis)).mean()
pred = log(pred)
return (
label_smooth * pred.mean()
- (1 - label_smooth) * indexing_one_hot(pred, label, axis).mean()
)


# Denominator of the softmax # Denominator of the softmax
offset = pred.max(axis=axis, keepdims=True).detach()
pred = pred - offset
down = exp(pred).sum(axis=axis, keepdims=True)
down = logsumexp(pred, axis=axis, keepdims=True)


up = indexing_one_hot(pred, label, axis) up = indexing_one_hot(pred, label, axis)


if label_smooth != 0:
if not no_label_smooth:
factor = label_smooth / num_classes factor = label_smooth / num_classes
up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor up = up * (1 - label_smooth) + pred.sum(axis=axis, keepdims=True) * factor


return (log(down) - up).mean()
return (down - up).mean()




def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
r"""Function that measures the Binary Cross Entropy between the target and the prediction.
def binary_cross_entropy(
pred: Tensor, label: Tensor, with_logits: bool = True
) -> Tensor:
r"""Compute the binary cross entropy loss (using logits by default).

By default, prediction is assumed to be logits, whose sigmoid gives probabilities.


:param pred: `(N, *)` where `*` means any number of additional dimensions.
:param pred: `(N, *)`, where `*` means any number of additional dimensions.
:param label: `(N, *)`, same shape as the input. :param label: `(N, *)`, same shape as the input.
:param with_logits: bool, whether to apply sigmoid first. Default: True
:return: loss value. :return: loss value.


Examples: Examples:
@@ -200,9 +222,9 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
from megengine import tensor from megengine import tensor
import megengine.functional as F import megengine.functional as F


pred = tensor(np.array([0.5, 0.5], dtype=np.float32).reshape(1, 2))
pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2))
label = tensor(np.ones((1, 2), dtype=np.float32)) label = tensor(np.ones((1, 2), dtype=np.float32))
loss = F.binary_cross_entropy(pred, label)
loss = F.nn.binary_cross_entropy(pred, label)
print(loss.numpy()) print(loss.numpy())


Outputs: Outputs:
@@ -212,11 +234,15 @@ def binary_cross_entropy(pred: Tensor, label: Tensor) -> Tensor:
[0.6931] [0.6931]


""" """
return -1.0 * (label * log(pred) + (1.0 - label) * log(1 - pred)).mean()
if not with_logits:
return -(label * log(pred) + (1 - label) * log(1 - pred)).mean()
# logsigmoid(pred) and logsigmoid(-pred) has common sub-expression
# hopefully the backend would optimize this
return -(label * logsigmoid(pred) + (1 - label) * logsigmoid(-pred)).mean()




def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor: def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:
r"""Caculate the hinge loss which is often used in SVMs.
r"""Caculates the hinge loss which is often used in SVM.


The hinge loss can be described as: The hinge loss can be described as:


@@ -236,7 +262,7 @@ def hinge_loss(pred: Tensor, label: Tensor, norm: str = "L1") -> Tensor:


pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32")
label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32")
loss = F.hinge_loss(pred, label)
loss = F.nn.hinge_loss(pred, label)
print(loss.numpy()) print(loss.numpy())


Outputs: Outputs:


+ 81
- 56
imperative/python/megengine/functional/math.py View File

@@ -14,11 +14,12 @@ from typing import Optional, Sequence, Tuple, Union


from ..core.ops import builtin from ..core.ops import builtin
from ..core.ops._internal import param_defs as P from ..core.ops._internal import param_defs as P
from ..core.ops.special import Const
from ..core.tensor import utils from ..core.tensor import utils
from ..core.tensor.core import apply
from ..core.tensor.core import TensorBase, TensorWrapperBase, apply
from ..tensor import Tensor from ..tensor import Tensor
from .elemwise import clamp, exp, log, log1p
from .tensor import add_axis, remove_axis, reshape
from .elemwise import clip, exp, log, log1p
from .tensor import reshape, squeeze


__all__ = [ __all__ = [
"argmax", "argmax",
@@ -45,7 +46,7 @@ def isnan(inp: Tensor) -> Tensor:
r"""Returns a new tensor representing if each element is ``NaN`` or not. r"""Returns a new tensor representing if each element is ``NaN`` or not.


:param inp: input tensor. :param inp: input tensor.
:return: a new tensor representing if each element in inp is NaN or not.
:return: result tensor.


Examples: Examples:


@@ -71,7 +72,7 @@ def isinf(inp: Tensor) -> Tensor:
r"""Returns a new tensor representing if each element is ``Inf`` or not. r"""Returns a new tensor representing if each element is ``Inf`` or not.


:param inp: input tensor. :param inp: input tensor.
:return: a new tensor representing if each element in inp is Inf or not.
:return: result tensor.


Examples: Examples:


@@ -84,7 +85,7 @@ def isinf(inp: Tensor) -> Tensor:
print(F.isinf(x).numpy()) print(F.isinf(x).numpy())


Outputs: Outputs:
.. testoutput:: .. testoutput::


[False True False] [False True False]
@@ -108,7 +109,7 @@ def sign(inp: Tensor):


x = tensor([1, -1, 0]) x = tensor([1, -1, 0])
print(F.sign(x).numpy()) print(F.sign(x).numpy())
Outputs: Outputs:


.. testoutput:: .. testoutput::
@@ -128,7 +129,7 @@ def sum(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced.
:param axis: dimension to reduce. If None, all dimensions will be reduced.
Default: None Default: None
:param keepdims: whether the output tensor has axis retained or not. :param keepdims: whether the output tensor has axis retained or not.
Default: False Default: False
@@ -163,7 +164,7 @@ def prod(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -199,7 +200,7 @@ def mean(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -235,7 +236,7 @@ def var(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -275,7 +276,7 @@ def std(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -310,7 +311,7 @@ def min(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -346,7 +347,7 @@ def max(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -373,18 +374,14 @@ def max(




def norm( def norm(
inp: Tensor,
p: int = 2,
axis: Optional[Union[int, Sequence[int]]] = None,
keepdims=False,
inp: Tensor, ord: float = None, axis: int = None, keepdims=False,
): ):
"""Calculates ``p``-norm of input tensor along """Calculates ``p``-norm of input tensor along
given axis. If axis is a list of dimensions,
reduce over all of them.
given axis.


:param inp: input tensor. :param inp: input tensor.
:param p: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param ord: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, input must be a vector. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -396,7 +393,7 @@ def norm(
from megengine import tensor from megengine import tensor
import megengine.functional as F import megengine.functional as F


x = tensor(np.arange(-3, 3, dtype=np.float32).reshape(2,3))
x = tensor(np.arange(-3, 3, dtype=np.float32))
out = F.norm(x) out = F.norm(x)
print(out.numpy()) print(out.numpy())


@@ -407,13 +404,18 @@ def norm(
[4.3589] [4.3589]


""" """
if p == 0:
if axis is None:
if inp.ndim != 1:
raise TypeError("axis is required unless input is a vector")
if ord is None:
ord = 2
if ord == 0:
return sum(inp != 0, axis=axis, keepdims=keepdims) return sum(inp != 0, axis=axis, keepdims=keepdims)
if p == math.inf:
if ord == math.inf:
return max(abs(inp)) return max(abs(inp))
if p == -math.inf:
if ord == -math.inf:
return min(abs(inp)) return min(abs(inp))
return sum(abs(inp) ** p, axis=axis, keepdims=keepdims) ** (1.0 / p)
return sum(abs(inp) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)




def argmin( def argmin(
@@ -426,7 +428,7 @@ def argmin(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -458,7 +460,7 @@ def argmin(
(inp,) = apply(op, inp) (inp,) = apply(op, inp)


if not keepdims: if not keepdims:
inp = remove_axis(inp, ai)
inp = squeeze(inp, ai)


return inp return inp


@@ -470,7 +472,7 @@ def argmin(
op = builtin.Argmin(axis=axis) op = builtin.Argmin(axis=axis)
(result,) = apply(op, inp) (result,) = apply(op, inp)
if not keepdims: if not keepdims:
result = remove_axis(result, axis)
result = squeeze(result, axis)
return result return result




@@ -484,7 +486,7 @@ def argmax(
reduce over all of them. reduce over all of them.


:param inp: input tensor. :param inp: input tensor.
:param axis: dimension to reduce. If None, all the dimensions will be reduced. Default: None
:param axis: dimension to reduce. If None, all dimensions will be reduced. Default: None
:param keepdims: whether the output tensor has axis retained or not. Default: False :param keepdims: whether the output tensor has axis retained or not. Default: False
:return: output tensor. :return: output tensor.


@@ -516,7 +518,7 @@ def argmax(
(inp,) = apply(op, inp) (inp,) = apply(op, inp)


if not keepdims: if not keepdims:
inp = remove_axis(inp, ai)
inp = squeeze(inp, ai)


return inp return inp


@@ -528,45 +530,40 @@ def argmax(
op = builtin.Argmax(axis=axis) op = builtin.Argmax(axis=axis)
(result,) = apply(op, inp) (result,) = apply(op, inp)
if not keepdims: if not keepdims:
result = remove_axis(result, axis)
result = squeeze(result, axis)
return result return result




def normalize( def normalize(
inp: Tensor,
p: int = 2,
axis: Optional[Union[int, Sequence[int]]] = None,
eps: float = 1e-12,
inp: Tensor, ord: float = None, axis: int = None, eps: float = 1e-12,
) -> Tensor: ) -> Tensor:
r"""Performs :math:`L_p` normalization of input tensor along r"""Performs :math:`L_p` normalization of input tensor along
given axis. If axis is a list of dimensions,
reduce over all of them.
given axis.


For a tensor inp of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
For a tensor of shape :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
:math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as: :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`axis` is transformed as:


.. math:: .. math::
v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}.


:param inp: input tensor. :param inp: input tensor.
:param p: power of value applied to inp. Default: 2
:param axis: dimension to reduce. If None, all the dimensions will be reduced
to calculate the norm. Default: None
:param ord: power of value applied to input tensor. Default: 2
:param axis: dimension to reduce.If None, input must be a vector. Default: None
:param eps: a small value to avoid division by zero. Default: 1e-12 :param eps: a small value to avoid division by zero. Default: 1e-12
:return: normalized output tensor. :return: normalized output tensor.
""" """
if axis is None: if axis is None:
return inp / clamp(norm(inp, p, axis), lower=eps)
return inp / clip(norm(inp, ord, axis), lower=eps)
else: else:
return inp / clamp(norm(inp, p, axis, keepdims=True), lower=eps)
return inp / clip(norm(inp, ord, axis, keepdims=True), lower=eps)




def argsort(inp: Tensor, descending: bool = False) -> Tensor: def argsort(inp: Tensor, descending: bool = False) -> Tensor:
r"""Sorts the target 2d matrix by row, return both the sorted tensor and indices.
r"""Returns the indices that would sort the input tensor.


:param inp: input tensor, if 2d, each row will be sorted.
:param descending: Sort in descending order, where the largest comes first. Default: False
:return: Tuple of two tensors `(sorted_tensor, indices_of_int32)`.
:param inp: input tensor. If it's 2d, the result would be array of indices show how to sort each row in the input tensor.
:param descending: sort in descending order, where the largest comes first. Default: False
:return: indices of int32 indicates how to sort the input.


Examples: Examples:


@@ -603,6 +600,31 @@ def argsort(inp: Tensor, descending: bool = False) -> Tensor:




def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]: def sort(inp: Tensor, descending: bool = False) -> Tuple[Tensor, Tensor]:
r"""Returns sorted tensor and the indices would sort the input tensor.

:param inp: input tensor. If it's 2d, the result would be sorted by row.
:param descending: sort in descending order, where the largest comes first. Default: False
:return: tuple of two tensors `(sorted_tensor, indices_of_int32)`.

Examples:

.. testcode::

import numpy as np
from megengine import tensor
import megengine.functional as F

x = tensor(np.array([1,2], dtype=np.float32))
out, indices = F.sort(x)
print(out.numpy())

Outputs:

.. testoutput::

[1. 2.]

"""
assert len(inp.shape) <= 2, "Input should be 1d or 2d" assert len(inp.shape) <= 2, "Input should be 1d or 2d"
if descending: if descending:
order = P.Argsort.Order.DESCENDING order = P.Argsort.Order.DESCENDING
@@ -625,13 +647,13 @@ def topk(
kth_only: bool = False, kth_only: bool = False,
no_sort: bool = False, no_sort: bool = False,
) -> Tuple[Tensor, Tensor]: ) -> Tuple[Tensor, Tensor]:
r"""Selects the ``Top-K(by default)`` smallest elements of 2d matrix by row.
r"""Selects the ``Top-K``(by default) smallest elements of 2d matrix by row.


:param inp: input tensor, if 2d, each row will be sorted.
:param inp: input tensor. If input tensor is 2d, each row will be sorted.
:param k: number of elements needed. :param k: number of elements needed.
:param descending: if true, return the largest elements instead. Default: False
:param kth_only: if true, only the k-th element will be returned. Default: False
:param no_sort: if true, the returned elements can be unordered. Default: False
:param descending: if True, return the largest elements instead. Default: False
:param kth_only: if True, only the k-th element will be returned. Default: False
:param no_sort: if True, the returned elements can be unordered. Default: False
:return: tuple of two tensors `(topk_tensor, indices_of_int32)`. :return: tuple of two tensors `(topk_tensor, indices_of_int32)`.


Examples: Examples:
@@ -665,15 +687,18 @@ def topk(
mode = Mode.VALUE_IDX_SORTED mode = Mode.VALUE_IDX_SORTED
op = builtin.TopK(mode=mode) op = builtin.TopK(mode=mode)


if not isinstance(k, (TensorBase, TensorWrapperBase)):
(k,) = Const(k, dtype="int32", device=inp.device)(inp)

if len(inp.shape) == 1: if len(inp.shape) == 1:
inp = inp.reshape(1, -1) inp = inp.reshape(1, -1)
res = apply(op, inp, Tensor(k, dtype="int32"))
res = apply(op, inp, k)
if kth_only: if kth_only:
tns = res[0] tns = res[0]
else: else:
tns, ind = res[0][0], res[1][0] tns, ind = res[0][0], res[1][0]
else: else:
res = apply(op, inp, Tensor(k, dtype="int32"))
res = apply(op, inp, k)
if kth_only: if kth_only:
tns = res tns = res
else: else:


+ 184
- 180
imperative/python/megengine/functional/nn.py View File

@@ -13,46 +13,51 @@ from ..core._imperative_rt import CompNode
from ..core.ops import builtin from ..core.ops import builtin
from ..core.ops._internal import param_defs as P from ..core.ops._internal import param_defs as P
from ..core.ops.special import Const from ..core.ops.special import Const
from ..core.tensor import utils
from ..core.tensor import megbrain_graph, utils
from ..core.tensor.core import TensorBase, TensorWrapperBase, apply from ..core.tensor.core import TensorBase, TensorWrapperBase, apply
from ..core.tensor.utils import astensor1d
from ..distributed import WORLD, is_distributed from ..distributed import WORLD, is_distributed
from ..jit.tracing import is_tracing
from ..random import uniform from ..random import uniform
from ..tensor import Tensor from ..tensor import Tensor
from .debug_param import get_conv_execution_strategy from .debug_param import get_conv_execution_strategy
from .distributed import all_reduce_sum from .distributed import all_reduce_sum
from .elemwise import exp, floor, log, log1p, maximum, minimum, relu from .elemwise import exp, floor, log, log1p, maximum, minimum, relu
from .math import argsort, max, sum from .math import argsort, max, sum
from .tensor import add_axis, broadcast, concat, full, ones, remove_axis, reshape, zeros
from .tensor import (
broadcast_to,
concat,
expand_dims,
full,
ones,
reshape,
squeeze,
zeros,
)
from .types import _pair, _pair_nonzero from .types import _pair, _pair_nonzero


__all__ = [ __all__ = [
"adaptive_avg_pool2d",
"adaptive_max_pool2d",
"avg_pool2d", "avg_pool2d",
"batched_nms",
"batch_norm2d",
"batch_norm",
"conv2d", "conv2d",
"conv_transpose2d", "conv_transpose2d",
"dot", "dot",
"dropout", "dropout",
"embedding",
"indexing_one_hot", "indexing_one_hot",
"interpolate",
"leaky_relu", "leaky_relu",
"linear",
"local_conv2d", "local_conv2d",
"logsigmoid", "logsigmoid",
"logsumexp", "logsumexp",
"log_softmax",
"logsoftmax",
"matmul", "matmul",
"max_pool2d", "max_pool2d",
"nms",
"one_hot", "one_hot",
"prelu", "prelu",
"roi_align",
"roi_pooling",
"softmax", "softmax",
"softplus", "softplus",
"svd", "svd",
"sync_batch_norm",
"warp_perspective", "warp_perspective",
] ]


@@ -106,19 +111,18 @@ def conv2d(
:param padding: size of the paddings added to the input on both sides of its :param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0 spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1 :param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a ``grouped convolution``. When groups is not 1,
in_channels and out_channels must be divisible by groups,
:param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``,
and the shape of weight should be `(groups, out_channel // groups, and the shape of weight should be `(groups, out_channel // groups,
in_channels // groups, height, width)`. in_channels // groups, height, width)`.
:type conv_mode: string or :class:`P.Convolution.Mode`.
:type conv_mode: string or :class:`P.Convolution.Mode`
:param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default:
"CROSS_CORRELATION" "CROSS_CORRELATION"
:type compute_mode: string or :type compute_mode: string or
:class:`P.Convolution.ComputeMode`.
:class:`P.Convolution.ComputeMode`
:param compute_mode: when set to "DEFAULT", no special requirements will be :param compute_mode: when set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32", placed on the precision of intermediate results. When set to "FLOAT32",
Float32 would be used for accumulator and intermediate result, but only
"Float32" would be used for accumulator and intermediate result, but only
effective when input and output are of Float16 dtype. effective when input and output are of Float16 dtype.
:return: output tensor. :return: output tensor.
""" """
@@ -167,24 +171,23 @@ def conv_transpose2d(


:param inp: feature map of the convolution operation. :param inp: feature map of the convolution operation.
:param weight: convolution kernel. :param weight: convolution kernel.
:param bias: bias added to the result of convolution (if given)
:param bias: bias added to the result of convolution (if given).
:param stride: stride of the 2D convolution operation. Default: 1 :param stride: stride of the 2D convolution operation. Default: 1
:param padding: size of the paddings added to the input on both sides of its :param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0 spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1 :param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a ``grouped convolution``. When groups is not 1,
in_channels and out_channels must be divisible by groups,
:param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by groups,
and the shape of weight should be `(groups, out_channel // groups, and the shape of weight should be `(groups, out_channel // groups,
in_channels // groups, height, width)`. Default: 1 in_channels // groups, height, width)`. Default: 1
:type conv_mode: string or :class:`P.Convolution.Mode`.
:type conv_mode: string or :class:`P.Convolution.Mode`
:param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default: :param conv_mode: supports "CROSS_CORRELATION" or "CONVOLUTION". Default:
"CROSS_CORRELATION" "CROSS_CORRELATION"
:type compute_mode: string or :type compute_mode: string or
:class:`P.Convolution.ComputeMode`.
:class:`P.Convolution.ComputeMode`
:param compute_mode: when set to "DEFAULT", no special requirements will be :param compute_mode: when set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32", placed on the precision of intermediate results. When set to "FLOAT32",
Float32 would be used for accumulator and intermediate result, but only
"Float32" would be used for accumulator and intermediate result, but only
effective when input and output are of Float16 dtype. effective when input and output are of Float16 dtype.
:return: output tensor. :return: output tensor.
""" """
@@ -222,10 +225,8 @@ def local_conv2d(
padding: Union[int, Tuple[int, int]] = 0, padding: Union[int, Tuple[int, int]] = 0,
dilation: Union[int, Tuple[int, int]] = 1, dilation: Union[int, Tuple[int, int]] = 1,
conv_mode="CROSS_CORRELATION", conv_mode="CROSS_CORRELATION",
) -> Tensor:
"""Applies spatial 2D convolution over an image with untied kernels.

Refer to :class:`~.LocalConv2d` for more information.
):
"""Applies spatial 2D convolution over an groupped channeled image with untied kernels.
""" """
assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION" assert conv_mode == "CROSS_CORRELATION" or conv_mode.name == "CROSS_CORRELATION"


@@ -233,6 +234,8 @@ def local_conv2d(
pad_h, pad_w = expand_hw(padding) pad_h, pad_w = expand_hw(padding)
dilate_h, dilate_w = expand_hw(dilation) dilate_h, dilate_w = expand_hw(dilation)


Sparse = P.Convolution.Sparse

op = builtin.GroupLocal( op = builtin.GroupLocal(
stride_h=stride_h, stride_h=stride_h,
stride_w=stride_w, stride_w=stride_w,
@@ -240,7 +243,9 @@ def local_conv2d(
pad_w=pad_w, pad_w=pad_w,
dilate_h=dilate_h, dilate_h=dilate_h,
dilate_w=dilate_w, dilate_w=dilate_w,
# strategy=get_conv_execution_strategy(),
mode=conv_mode,
compute_mode="DEFAULT",
sparse=Sparse.DENSE,
) )
inp, weight = utils.convert_inputs(inp, weight) inp, weight = utils.convert_inputs(inp, weight)
(output,) = apply(op, inp, weight) (output,) = apply(op, inp, weight)
@@ -263,7 +268,7 @@ def max_pool2d(
:param kernel_size: size of the window. :param kernel_size: size of the window.
:param stride: stride of the window. If not provided, its value is set to kernel_size. :param stride: stride of the window. If not provided, its value is set to kernel_size.
Default: None Default: None
:param padding: implicit zero padding to be added on both sides. Default: 0
:param padding: implicit zero padding added on both sides. Default: 0
:return: output tensor. :return: output tensor.
""" """
if stride is None: if stride is None:
@@ -292,15 +297,15 @@ def avg_pool2d(
padding: Union[int, Tuple[int, int]] = 0, padding: Union[int, Tuple[int, int]] = 0,
mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING", mode: str = "AVERAGE_COUNT_EXCLUDE_PADDING",
) -> Tensor: ) -> Tensor:
"""Applies a 2D average pooling over an input tensor.
"""Applies 2D average pooling over an input tensor.


Refer to :class:`~.AvgPool2d` for more information. Refer to :class:`~.AvgPool2d` for more information.


:param inp: input tensor. :param inp: input tensor.
:param kernel_size: size of the window. :param kernel_size: size of the window.
:param stride: stride of the window. If not provided, its value is set to kernel_size.
:param stride: stride of the window. If not provided, its value is set to ``kernel_size``.
Default: None Default: None
:param padding: implicit zero padding to be added on both sides. Default: 0
:param padding: implicit zero padding added on both sides. Default: 0
:param mode: whether to count padding values. Default: "AVERAGE_COUNT_EXCLUDE_PADDING" :param mode: whether to count padding values. Default: "AVERAGE_COUNT_EXCLUDE_PADDING"
:return: output tensor. :return: output tensor.
""" """
@@ -323,6 +328,48 @@ def avg_pool2d(
return output return output




def adaptive_max_pool2d(
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor],
) -> Tensor:
"""Applies a 2D max adaptive pooling over an input.

Refer to :class:`~.MaxAdaptivePool2d` for more information.

:param inp: The input tensor.
:param oshp: (OH, OW) size of the output shape.
:return: output tensor.
"""
assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type"
if isinstance(oshp, int):
oshp = (oshp, oshp)

op = builtin.AdaptivePooling(mode="MAX", format="NCHW",)
oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device)
(output,) = apply(op, inp, oshp)
return output


def adaptive_avg_pool2d(
inp: Tensor, oshp: Union[Tuple[int, int], int, Tensor],
) -> Tensor:
"""Applies a 2D average adaptive pooling over an input.

Refer to :class:`~.AvgAdaptivePool2d` for more information.

:param inp: The input tensor.
:param oshp: (OH, OW) size of the output shape.
:return: output tensor.
"""
assert isinstance(inp, (Tensor, megbrain_graph.VarNode)), "inp must be Tensor type"
if isinstance(oshp, int):
oshp = (oshp, oshp)

op = builtin.AdaptivePooling(mode="AVERAGE", format="NCHW",)
oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device)
(output,) = apply(op, inp, oshp)
return output


def prelu(inp: Tensor, weight: Tensor) -> Tensor: def prelu(inp: Tensor, weight: Tensor) -> Tensor:
r""" r"""
Applies the element-wise PReLU function. Applies the element-wise PReLU function.
@@ -346,17 +393,17 @@ def softplus(inp: Tensor) -> Tensor:


.. math:: .. math::
\text{softplus}(x) = \log(1 + \exp(x)) \text{softplus}(x) = \log(1 + \exp(x))
softplus is a smooth approximation to the ReLU function and can be used softplus is a smooth approximation to the ReLU function and can be used
to constrain the output of a machine to always be positive.
to constrain the output to be always positive.
For numerical stability the implementation follows this transformation: For numerical stability the implementation follows this transformation:


.. math:: .. math::
\text{softplus}(x) = \log(1 + \exp(x))
= \log(1 + \exp(-\text{abs}(x))) + \max(x, 0)
\text{softplus}(x) = \log(1 + \exp(x))
= \log(1 + \exp(-\text{abs}(x))) + \max(x, 0)
= \log1p(\exp(-\text{abs}(x))) + \text{relu}(x) = \log1p(\exp(-\text{abs}(x))) + \text{relu}(x)


:param inp: The input tensor
:param inp: input tensor.


Examples: Examples:


@@ -369,9 +416,9 @@ def softplus(inp: Tensor) -> Tensor:
x = tensor(np.arange(-3, 3, dtype=np.float32)) x = tensor(np.arange(-3, 3, dtype=np.float32))
y = F.softplus(x) y = F.softplus(x)
print(y.numpy()) print(y.numpy())
Outputs: Outputs:
.. testoutput:: .. testoutput::


[0.0486 0.1269 0.3133 0.6931 1.3133 2.1269] [0.0486 0.1269 0.3133 0.6931 1.3133 2.1269]
@@ -380,7 +427,7 @@ def softplus(inp: Tensor) -> Tensor:
return log1p(exp(-abs(inp))) + relu(inp) return log1p(exp(-abs(inp))) + relu(inp)




def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional
input Tensor. The LogSoftmax formulation can be simplified as: input Tensor. The LogSoftmax formulation can be simplified as:


@@ -390,13 +437,13 @@ def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
For numerical stability the implementation follows this transformation: For numerical stability the implementation follows this transformation:


.. math:: .. math::
\operatorname{logsoftmax}(x)
\operatorname{logsoftmax}(x)
= \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))}) = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))})
= x - \log (\sum_{i}(\exp (x_{i}))) = x - \log (\sum_{i}(\exp (x_{i})))
= x - logsumexp(x) = x - logsumexp(x)
:param inp: The input tensor
:param axis: An axis along which log_softmax will be applied.
:param inp: input tensor.
:param axis: axis along which logsoftmax will be applied.


Examples: Examples:


@@ -407,11 +454,11 @@ def log_softmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
import megengine.functional as F import megengine.functional as F


x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5) x = tensor(np.arange(-5, 5, dtype=np.float32)).reshape(2,5)
y = F.log_softmax(x, axis=1)
y = F.logsoftmax(x, axis=1)
print(y.numpy()) print(y.numpy())


Outputs: Outputs:
.. testoutput:: .. testoutput::


[[-4.4519 -3.4519 -2.4519 -1.4519 -0.4519] [[-4.4519 -3.4519 -2.4519 -1.4519 -0.4519]
@@ -430,7 +477,7 @@ def logsigmoid(inp: Tensor) -> Tensor:
= - \log(1 + exp(-x)) = - \log(1 + exp(-x))
= - \text{softplus}(-x) = - \text{softplus}(-x)


:param inp: The input tensor
:param inp: input tensor.


Examples: Examples:


@@ -459,11 +506,10 @@ def logsumexp(
inp: Tensor, axis: Union[int, Sequence[int]], keepdims: bool = False inp: Tensor, axis: Union[int, Sequence[int]], keepdims: bool = False
) -> Tensor: ) -> Tensor:
r""" r"""
Compute the log of the sum of exponentials of inputs along the given :attr:`axis`.
The computation is numerically stabilized.
Calculates the logarithm of the inputs' exponential sum along the given :attr:`axis`.

.. math:: .. math::
\operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)


For numerical stability, the implementation follows this transformation: For numerical stability, the implementation follows this transformation:
@@ -472,18 +518,18 @@ def logsumexp(


\operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right) \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
= \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right) = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right)
where where


.. math:: .. math::
b = \max(x_j) b = \max(x_j)


:param inp: The input tensor.
:param axis: Axis over which the sum is taken. It can be a single axis or a list of axes.
:param inp: input tensor.
:param axis: axis over which the sum is taken. It could be single axis or list of axes.
:param keepdims: whether to retain :attr:`axis` or not for the output tensor. :param keepdims: whether to retain :attr:`axis` or not for the output tensor.


Examples: Examples:
.. testcode:: .. testcode::


import numpy as np import numpy as np
@@ -501,11 +547,11 @@ def logsumexp(
[-0.5481 4.4519] [-0.5481 4.4519]


""" """
max_value = max(inp, axis, keepdims=True)
max_value = max(inp.detach(), axis, keepdims=True)
if keepdims: if keepdims:
return max_value + log(sum(exp(inp - max_value), axis, keepdims)) return max_value + log(sum(exp(inp - max_value), axis, keepdims))
else: else:
return remove_axis(max_value, axis=None) + log(
return squeeze(max_value, axis=None) + log(
sum(exp(inp - max_value), axis, keepdims) sum(exp(inp - max_value), axis, keepdims)
) )


@@ -523,13 +569,13 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
.. math:: .. math::
\text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}


It is applied to all elements along axis, and will re-scale them so that
the elements lie in the range `[0, 1]` and sum to 1.
It is applied to all elements along axis, and rescales elements so that
they stay in the range `[0, 1]` and sum to 1.


See :class:`~megengine.module.activation.Softmax` for more details. See :class:`~megengine.module.activation.Softmax` for more details.


:param inp: The input tensor.
:param axis: An axis along which softmax will be applied. By default,
:param inp: input tensor.
:param axis: an axis along which softmax will be applied. By default,
softmax will apply along the highest ranked axis. softmax will apply along the highest ranked axis.


Examples: Examples:
@@ -560,7 +606,7 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
return cached / down return cached / down




def batch_norm2d(
def batch_norm(
inp: Tensor, inp: Tensor,
running_mean: Tensor = None, running_mean: Tensor = None,
running_var: Tensor = None, running_var: Tensor = None,
@@ -572,7 +618,7 @@ def batch_norm2d(
eps: float = 1e-5, eps: float = 1e-5,
inplace: bool = True inplace: bool = True
): ):
"""Applies batch normalization to the input.
r"""Applies batch normalization to the input.


Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information.


@@ -584,26 +630,28 @@ def batch_norm2d(
:param bias: bias tensor in the learnable affine parameters. :param bias: bias tensor in the learnable affine parameters.
See :math:`\beta` in :class:`~.BatchNorm2d`. See :math:`\beta` in :class:`~.BatchNorm2d`.
:param training: a boolean value to indicate whether batch norm is performed :param training: a boolean value to indicate whether batch norm is performed
in traning mode. Default: False
in training mode. Default: False
:param momentum: value used for the ``running_mean`` and ``running_var`` :param momentum: value used for the ``running_mean`` and ``running_var``
computation. computation.
Default: 0.9 Default: 0.9
:param eps: a value added to the denominator for numerical stability. :param eps: a value added to the denominator for numerical stability.
Default: 1e-5 Default: 1e-5
:param inplace: whether to update running_mean and running_var inplace or return new tensors
:param inplace: whether to update ``running_mean`` and ``running_var`` inplace or return new tensors
Default: True Default: True
:return: output tensor. :return: output tensor.
""" """
if inp.ndim != 4:
raise NotImplementedError("batch_norm for ndim != 4")


def full_value(value): def full_value(value):
C = inp.shape[1] C = inp.shape[1]
(x,) = Const(value, dtype=inp.dtype, device=inp.device)(inp) (x,) = Const(value, dtype=inp.dtype, device=inp.device)(inp)
return broadcast(x, [1, C, 1, 1])
return broadcast_to(x, [1, C, 1, 1])


def expand_or_full(x, value): def expand_or_full(x, value):
if x is None: if x is None:
return full_value(value) return full_value(value)
return add_axis(x, [0, 2, 3])
return expand_dims(x, [0, 2, 3])


def make_full_if_none(x, value): def make_full_if_none(x, value):
if x is None: if x is None:
@@ -676,7 +724,7 @@ def sync_batch_norm(
eps_mode="ADDITIVE", eps_mode="ADDITIVE",
group=WORLD, group=WORLD,
) -> Tensor: ) -> Tensor:
"""Applies synchronized batch normalization to the input.
r"""Applies synchronized batch normalization to the input.


Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information. Refer to :class:`~.BatchNorm2d` and :class:`~.BatchNorm1d` for more information.


@@ -717,7 +765,7 @@ def sync_batch_norm(


if is_distributed(): if is_distributed():
# reduce all nodes' data to calculate mean and variance # reduce all nodes' data to calculate mean and variance
reduce_size = broadcast(Tensor(reduce_size, dtype=_dtype), [1] * _ndim)
reduce_size = broadcast_to(Tensor(reduce_size, dtype=_dtype), [1] * _ndim)
stat = concat( stat = concat(
[reduce_size.astype(_dtype), channel_x1s, channel_x2s], axis=1 [reduce_size.astype(_dtype), channel_x1s, channel_x2s], axis=1
) )
@@ -838,6 +886,10 @@ def warp_perspective(
:param interp_mode: interpolation methods. Default: "LINEAR" :param interp_mode: interpolation methods. Default: "LINEAR"
:return: output tensor. :return: output tensor.


Note:

The transformation matrix is the inverse of that used by `cv2.warpPerspective`.

Examples: Examples:


.. testcode:: .. testcode::
@@ -868,7 +920,8 @@ def warp_perspective(
imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val
) )
inp, M = utils.convert_inputs(inp, M) inp, M = utils.convert_inputs(inp, M)
(result,) = apply(op, inp, M, Tensor(dsize))
dsize = astensor1d(dsize, inp, dtype="int32", device=inp.device)
(result,) = apply(op, inp, M, dsize)
return result return result




@@ -885,19 +938,18 @@ def matmul(


With different inputs dim, this function behaves differently: With different inputs dim, this function behaves differently:


- Both 1-D tensor, simply forward to dot.
- Both 1-D tensor, simply forward to ``dot``.
- Both 2-D tensor, normal matrix multiplication. - Both 2-D tensor, normal matrix multiplication.
- If one input tensor is 1-D, matrix vector multiplication. - If one input tensor is 1-D, matrix vector multiplication.
- If at least one tensor are 3-dimensional or >3-dimensional, the batched matrix-matrix is returned, and the tensor with smaller dimension will
- If at least one tensor are 3-dimensional or >3-dimensional, the other tensor should have dim >= 2, the batched matrix-matrix is returned, and the tensor with smaller dimension will
be broadcasted. For example: be broadcasted. For example:
- inp1: `(k, m)`, inp2: `(m, p)`, return: `(k, p)`
- inp1: `(n, k, m)`, inp2: `(n, m, p)`, return: `(n, k, p)` - inp1: `(n, k, m)`, inp2: `(n, m, p)`, return: `(n, k, p)`
- inp1: `(n, k, m)`, inp2: `(m, p)`, return: `(n, k, p)` - inp1: `(n, k, m)`, inp2: `(m, p)`, return: `(n, k, p)`
- inp1: `(n, j, k, m)`, inp2: `(n, j, m, p)`, return: `(n, j, k, p)` - inp1: `(n, j, k, m)`, inp2: `(n, j, m, p)`, return: `(n, j, k, p)`


:param inp1: The first matrix to be multiplied
:param inp2: The second matrix to be multiplied
:return: The output tensor
:param inp1: first matrix to be multiplied.
:param inp2: second matrix to be multiplied.
:return: output tensor.


Examples: Examples:


@@ -931,10 +983,10 @@ def matmul(
if dim1 != dim2: if dim1 != dim2:
if dim1 < dim2: if dim1 < dim2:
shape1 = shape2[: dim2 - dim1] + shape1 shape1 = shape2[: dim2 - dim1] + shape1
inp1 = inp1.broadcast(*shape1)
inp1 = broadcast_to(inp1, shape1)
else: else:
shape2 = shape1[: dim1 - dim2] + shape2 shape2 = shape1[: dim1 - dim2] + shape2
inp2 = inp2.broadcast(*shape2)
inp2 = broadcast_to(inp2, shape2)
reshaped_batch_size = 1 reshaped_batch_size = 1
for i in shape1[:-2]: for i in shape1[:-2]:
reshaped_batch_size *= i reshaped_batch_size *= i
@@ -949,9 +1001,9 @@ def matmul(
shp = shape1[:-1] + shape2[-1:] shp = shape1[:-1] + shape2[-1:]
elif dim1 == 3 or dim2 == 3: elif dim1 == 3 or dim2 == 3:
if dim2 < 3: if dim2 < 3:
inp2 = inp2.broadcast(*(inp1.shape[:1] + inp2.shape))
inp2 = broadcast_to(inp2, inp1.shape[:1] + inp2.shape)
elif dim1 < 3: elif dim1 < 3:
inp1 = inp1.broadcast(*(inp2.shape[:1] + inp1.shape))
inp1 = broadcast_to(inp1, inp2.shape[:1] + inp1.shape)
op = builtin.BatchedMatrixMul( op = builtin.BatchedMatrixMul(
transposeA=transpose_a, transposeA=transpose_a,
transposeB=transpose_b, transposeB=transpose_b,
@@ -961,10 +1013,10 @@ def matmul(
else: else:
if dim1 == 1: if dim1 == 1:
shp = (inp2.shape[1],) shp = (inp2.shape[1],)
inp1 = add_axis(inp1, 0)
inp1 = expand_dims(inp1, 0)
if dim2 == 1: if dim2 == 1:
shp = (inp1.shape[0],) shp = (inp1.shape[0],)
inp2 = add_axis(inp2, 1)
inp2 = expand_dims(inp2, 1)
op = builtin.MatrixMul( op = builtin.MatrixMul(
transposeA=transpose_a, transposeA=transpose_a,
transposeB=transpose_b, transposeB=transpose_b,
@@ -981,12 +1033,12 @@ def matmul(


def dot(inp1: Tensor, inp2: Tensor) -> Tensor: def dot(inp1: Tensor, inp2: Tensor) -> Tensor:
""" """
Compute dot-product of two vectors ``inp1`` and ``inp2``.
Computes dot-product of two vectors ``inp1`` and ``inp2``.
inputs must be 1-dimensional, scalar input can be automatically broadcasted. inputs must be 1-dimensional, scalar input can be automatically broadcasted.


:param inp1: The first vector
:param inp2: The second vector
:return: The output value
:param inp1: first vector.
:param inp2: second vector.
:return: output value.


Examples: Examples:


@@ -1016,10 +1068,10 @@ def dot(inp1: Tensor, inp2: Tensor) -> Tensor:


def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor: def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor:
""" """
Compute the singular value decompositions of input matrix ``inp``.
Computes the singular value decompositions of input matrix.


:param inp: The input matrix, must has shape ``[..., M, N]``
:return: The output matrices, U, sigma, V
:param inp: input matrix, must has shape `[..., M, N]`.
:return: output matrices, `(U, sigma, V)`.


Examples: Examples:


@@ -1036,7 +1088,7 @@ def svd(inp: Tensor, full_matrices=False, compute_uv=True) -> Tensor:
Outputs: Outputs:


.. testoutput:: .. testoutput::
[7.3485 1. ] [7.3485 1. ]


""" """
@@ -1052,8 +1104,7 @@ def interpolate(
mode: str = "BILINEAR", mode: str = "BILINEAR",
align_corners: bool = None, align_corners: bool = None,
) -> Tensor: ) -> Tensor:
r"""Down/up samples the input tensor to either the given size or the given
scale_factor.
r"""Down/up samples the input tensor to either the given size or with the given scale_factor. ``size`` can not coexist with ``scale_factor``.


:param inp: input tensor. :param inp: input tensor.
:param size: size of the output tensor. Default: None :param size: size of the output tensor. Default: None
@@ -1069,13 +1120,12 @@ def interpolate(
import numpy as np import numpy as np
from megengine import tensor from megengine import tensor
import megengine.functional as F import megengine.functional as F
from megengine.test import assertTensorClose


x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2)) x = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2))
out = F.interpolate(x, [4, 4], align_corners=False)
out = F.nn.interpolate(x, [4, 4], align_corners=False)
print(out.numpy()) print(out.numpy())
out2 = F.interpolate(x, scale_factor=2.)
assertTensorClose(out.numpy(), out2.numpy())
out2 = F.nn.interpolate(x, scale_factor=2.)
np.testing.assert_allclose(out.numpy(), out2.numpy())


Outputs: Outputs:


@@ -1100,7 +1150,7 @@ def interpolate(
align_corners = False align_corners = False


if mode == "LINEAR": if mode == "LINEAR":
inp = add_axis(inp, 3)
inp = expand_dims(inp, 3)


if inp.ndim != 4: if inp.ndim != 4:
raise ValueError("shape of input tensor must correspond to the operartion mode") raise ValueError("shape of input tensor must correspond to the operartion mode")
@@ -1170,7 +1220,7 @@ def interpolate(
[row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)],
axis=0, axis=0,
).reshape(1, 3, 3) ).reshape(1, 3, 3)
weight = broadcast(weight, (inp.shape[0], 3, 3))
weight = broadcast_to(weight, (inp.shape[0], 3, 3))
else: else:
hscale = 1.0 * ih / oh hscale = 1.0 * ih / oh
wscale = 1.0 * iw / ow wscale = 1.0 * iw / ow
@@ -1186,7 +1236,7 @@ def interpolate(
[row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)], [row0, row1, Tensor([[0, 0, 1]], dtype="float32", device=inp.device)],
axis=0, axis=0,
).reshape(1, 3, 3) ).reshape(1, 3, 3)
weight = broadcast(weight, (inp.shape[0], 3, 3))
weight = broadcast_to(weight, (inp.shape[0], 3, 3))


weight = weight.astype("float32") weight = weight.astype("float32")
ret = warp_perspective(inp, weight, dsize, interp_mode="LINEAR") ret = warp_perspective(inp, weight, dsize, interp_mode="LINEAR")
@@ -1197,12 +1247,12 @@ def interpolate(


def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor: def dropout(inp: Tensor, drop_prob: float, training: bool = True) -> Tensor:
"""Returns a new tensor where each of the elements are randomly set to zero """Returns a new tensor where each of the elements are randomly set to zero
with probability P = ``drop_prob``. Optionally rescale the output tensor.
with probability P = ``drop_prob``. Optionally rescale the output tensor if ``training`` is True.


:param inp: input tensor. :param inp: input tensor.
:param drop_prob: probability to drop (set to zero) a single element. :param drop_prob: probability to drop (set to zero) a single element.
:param training: the default behavior of ``dropout`` during training is to rescale the output, :param training: the default behavior of ``dropout`` during training is to rescale the output,
then it can be replaced by an :class:`~.Identity` during inference, default to True.
then it can be replaced by an :class:`~.Identity` during inference. Default: True
:return: the output tensor :return: the output tensor


Examples: Examples:
@@ -1244,10 +1294,10 @@ def embedding(
"""Applies lookup table for embedding. """Applies lookup table for embedding.


:param inp: tensor with indices. :param inp: tensor with indices.
:param weight: learnable weights which embedding from.
:param padding_idx: should be set to None, not support now.
:param max_norm: should be set to None, not support now.
:param norm_type: should be set to None, not support now.
:param weight: learnable weights which embeds from.
:param padding_idx: should be set to None, not supported now.
:param max_norm: should be set to None, not supported now.
:param norm_type: should be set to None, not supported now.
:return: output tensor. :return: output tensor.


Refer to :class:`~.Embedding` for more information. Refer to :class:`~.Embedding` for more information.
@@ -1288,7 +1338,7 @@ def roi_pooling(
np.random.seed(42) np.random.seed(42)
inp = tensor(np.random.randn(1, 1, 128, 128)) inp = tensor(np.random.randn(1, 1, 128, 128))
rois = tensor(np.random.random((4, 5))) rois = tensor(np.random.random((4, 5)))
y = F.roi_pooling(inp, rois, (2, 2))
y = F.nn.roi_pooling(inp, rois, (2, 2))
print(y.numpy()[0]) print(y.numpy()[0])


Outputs: Outputs:
@@ -1323,14 +1373,14 @@ def roi_align(
) -> Tensor: ) -> Tensor:
"""Applies roi align on input feature. """Applies roi align on input feature.


:param inp: tensor that represents the input feature, `(N, C, H, W)` images.
:param rois: `(N, 5)` boxes. First column is the index into N. The other 4 columns are xyxy.
:param inp: tensor that represents the input feature, shape is `(N, C, H, W)`.
:param rois: `(N, 5)` boxes. First column is the box index. The other 4 columns are ``xyxy``.
:param output_shape: `(height, width)` shape of output rois feature. :param output_shape: `(height, width)` shape of output rois feature.
:param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average" :param mode: "max" or "average", use max/average align just like max/average pooling. Default: "average"
:param spatial_scale: scale the input boxes by this number. Default: 1.0 :param spatial_scale: scale the input boxes by this number. Default: 1.0
:param sample_points: number of inputs samples to take for each output sample. :param sample_points: number of inputs samples to take for each output sample.
0 to take samples densely. Default: 2 0 to take samples densely. Default: 2
:param aligned: wheather align the input feature, with `aligned=True`,
:param aligned: wheather to align the input feature, with `aligned=True`,
we first appropriately scale the ROI and then shift it by -0.5. Default: True we first appropriately scale the ROI and then shift it by -0.5. Default: True
:return: output tensor. :return: output tensor.


@@ -1345,7 +1395,7 @@ def roi_align(
np.random.seed(42) np.random.seed(42)
inp = tensor(np.random.randn(1, 1, 128, 128)) inp = tensor(np.random.randn(1, 1, 128, 128))
rois = tensor(np.random.random((4, 5))) rois = tensor(np.random.random((4, 5)))
y = F.roi_align(inp, rois, (2, 2))
y = F.nn.roi_align(inp, rois, (2, 2))
print(y.numpy()[0]) print(y.numpy()[0])


Outputs: Outputs:
@@ -1383,7 +1433,7 @@ def roi_align(
def indexing_one_hot( def indexing_one_hot(
src: Tensor, index: Tensor, axis: int = 1, keepdims=False src: Tensor, index: Tensor, axis: int = 1, keepdims=False
) -> Tensor: ) -> Tensor:
r"""One-hot indexing for some axis.
r"""One-hot indexing for some axes.


:param src: input tensor. :param src: input tensor.
:param index: index tensor. :param index: index tensor.
@@ -1417,19 +1467,23 @@ def indexing_one_hot(
index = utils.convert_single_value(index, (src,), dtype="int32", device=src.device) index = utils.convert_single_value(index, (src,), dtype="int32", device=src.device)
(result,) = apply(op, src, index) (result,) = apply(op, src, index)
if not keepdims: if not keepdims:
result = remove_axis(result, axis)
result = squeeze(result, axis)
return result return result




def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor:
def nms(
boxes: Tensor, scores: Tensor, iou_thresh: float, max_output: Optional[int] = None
) -> Tensor:
r""" r"""
Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU). Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union(IoU).


:param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format. :param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format.
:param iou_thresh: iou threshold for overlapping.
:param iou_thresh: IoU threshold for overlapping.
:param scores: tensor of shape `(N,)`, the score of boxes. :param scores: tensor of shape `(N,)`, the score of boxes.
:param max_output: the maximum number of boxes to keep; it is optional if this operator is not traced
otherwise it required to be specified; if it is not specified, all boxes are kept.
:return: indices of the elements that have been kept by NMS. :return: indices of the elements that have been kept by NMS.
Examples: Examples:


.. testcode:: .. testcode::
@@ -1444,13 +1498,13 @@ def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor:
x[:,2:] = np.random.rand(100,2)*20 + 100 x[:,2:] = np.random.rand(100,2)*20 + 100
scores = tensor(np.random.rand(100)) scores = tensor(np.random.rand(100))
inp = tensor(x) inp = tensor(x)
result = F.nms(inp, scores, iou_thresh=0.7)
result = F.nn.nms(inp, scores, iou_thresh=0.7)
print(result.numpy()) print(result.numpy())


Outputs: Outputs:


.. testoutput:: .. testoutput::
[75 69] [75 69]


""" """
@@ -1466,74 +1520,24 @@ def nms(boxes: Tensor, scores: Tensor, iou_thresh: float) -> Tensor:
scores = scores.detach() scores = scores.detach()
sorted_idx = argsort(scores, descending=True) sorted_idx = argsort(scores, descending=True)
boxes = boxes[sorted_idx] boxes = boxes[sorted_idx]
max_output = boxes.shape[0]

if is_tracing():
assert (
max_output is not None and max_output > 0
), "max_output should be specified under tracing"

if max_output is None:
max_output = boxes.shape[0]


op = builtin.NMSKeep(iou_thresh, max_output) op = builtin.NMSKeep(iou_thresh, max_output)
inp = utils.convert_inputs(boxes.reshape(1, -1, 4)) inp = utils.convert_inputs(boxes.reshape(1, -1, 4))
indices, count = apply(op, *inp) indices, count = apply(op, *inp)
indices = indices[0][: count.item()]
indices = indices[0][: count[0]]
keep_inds = sorted_idx[indices] keep_inds = sorted_idx[indices]
return keep_inds return keep_inds




def batched_nms(
boxes: Tensor, scores: Tensor, idxs: Tensor, iou_thresh: float,
) -> Tensor:
r"""
Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU).

:param boxes: tensor of shape `(N, 4)`; the boxes to perform nms on; each box is expected to be in `(x1, y1, x2, y2)` format
:param iou_thresh: iou threshold for overlapping
:param idxs: tensor of shape `(N,)`, the class indexs of boxes in the batch.
:param scores: tensor of shape `(N,)`, the score of boxes.
:return: indices and the number of the elements that have been kept by NMS


Examples:


.. testcode::

import numpy as np
from megengine import tensor
import megengine.functional as F

x = np.zeros((100,4))
np.random.seed(42)
x[:,:2] = np.random.rand(100,2)*20
x[:,2:] = np.random.rand(100,2)*20 + 100
scores = tensor(np.random.rand(100))
idxs = tensor(np.random.randint(0, 10, 100))
inp = tensor(x)
result = F.batched_nms(inp, scores, idxs, iou_thresh=0.6)
print(result.numpy())

Outputs:

.. testoutput::

[75 41 99 98 69 64 11 27 35 18]

"""
assert (
boxes.ndim == 2 and boxes.shape[1] == 4
), "the expected shape of boxes is (N, 4)"
assert scores.ndim == 1, "the expected shape of scores is (N,)"
assert idxs.ndim == 1, "the expected shape of idxs is (N,)"
assert boxes.shape[0] == scores.shape[0] == idxs.shape[0]

boxes = boxes.detach()
scores = scores.detach()
idxs = idxs.detach()
max_coordinate = boxes.max()
offsets = idxs.astype("float32") * (max_coordinate + 1)
boxes = boxes + offsets.reshape(-1, 1).broadcast(boxes.shape[0], 4)

sorted_idx = argsort(scores, descending=True)
boxes = boxes[sorted_idx]
max_output = boxes.shape[0]

op = builtin.NMSKeep(iou_thresh, max_output)
inp = utils.convert_inputs(boxes.reshape(1, -1, 4))
indices, count = apply(op, *inp)
indices = indices[0][: count.item()]
keep_inds = sorted_idx[indices]
return keep_inds
from .loss import * # isort:skip
from .quantized import conv_bias_activation # isort:skip

+ 0
- 34
imperative/python/megengine/functional/param_pack.py View File

@@ -1,34 +0,0 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np

from ..tensor import Tensor
from .distributed import all_reduce_sum
from .tensor import param_pack_concat, param_pack_split


def get_offsets(shapes):
offsets = []
offset = 0
for shape in shapes:
offsets.append(offset)
offset += int(np.prod(shape))
offsets.append(offset)
return offsets


def pack_allreduce_split(pack_list, shapes, group, reduce_method):
offsets_val = get_offsets(shapes)
offsets = Tensor(offsets_val)
packed_grads = param_pack_concat(pack_list, offsets, offsets_val)
packed_grads = all_reduce_sum(packed_grads, group)
if reduce_method == "mean":
packed_grads /= group.size
grads = param_pack_split(packed_grads, offsets_val, shapes)
return grads

+ 7
- 10
imperative/python/megengine/functional/quantized.py View File

@@ -34,26 +34,23 @@ def conv_bias_activation(
:param weight: convolution kernel. :param weight: convolution kernel.
:param bias: bias added to the result of convolution :param bias: bias added to the result of convolution
:param stride: stride of the 2D convolution operation. Default: 1 :param stride: stride of the 2D convolution operation. Default: 1
:param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0
:param padding: size of the paddings added to the input on both sides of its spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1 :param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a "grouped convolution". When groups is not 1,
in_channels and out_channels must be divisible by groups,
:param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``,
and the shape of weight should be `(groups, out_channel // groups, and the shape of weight should be `(groups, out_channel // groups,
in_channels // groups, height, width)`. in_channels // groups, height, width)`.
:type conv_mode: string or :class:`P.Convolution.Mode`. :type conv_mode: string or :class:`P.Convolution.Mode`.
:param conv_mode: supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default: :param conv_mode: supports 'CROSS_CORRELATION' or 'CONVOLUTION'. Default:
'CROSS_CORRELATION' 'CROSS_CORRELATION'
:param dtype: support for np.dtype, Default: np.int8
:param dtype: support for ``np.dtype``, Default: np.int8
:param scale: scale if use quantization, Default: 0.0 :param scale: scale if use quantization, Default: 0.0
:param zero_point: scale if use quantization quint8, Default: 0.0 :param zero_point: scale if use quantization quint8, Default: 0.0
:type compute_mode: string or :type compute_mode: string or
:class:`P.Convolution.ComputeMode`. :class:`P.Convolution.ComputeMode`.
:param compute_mode: when set to 'DEFAULT', no special requirements will be
placed on the precision of intermediate results. When set to 'FLOAT32',
Float32 would be used for accumulator and intermediate result, but only
effective when input and output are of Float16 dtype.
:param compute_mode: when set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32",
"Float32" would be used for accumulator and intermediate result, but only effective when input and output are of Float16 dtype.


""" """
ph, pw = _pair(padding) ph, pw = _pair(padding)


+ 62
- 199
imperative/python/megengine/functional/tensor.py View File

@@ -19,6 +19,7 @@ from ..core.ops import builtin
from ..core.ops._internal import param_defs as P from ..core.ops._internal import param_defs as P
from ..core.ops.special import Const from ..core.ops.special import Const
from ..core.tensor.core import TensorBase, TensorWrapperBase, apply from ..core.tensor.core import TensorBase, TensorWrapperBase, apply
from ..core.tensor.tensor_wrapper import _broadcast, _remove_axis
from ..core.tensor.utils import ( from ..core.tensor.utils import (
astensor1d, astensor1d,
convert_inputs, convert_inputs,
@@ -31,27 +32,22 @@ from ..tensor import Tensor
from .elemwise import ceil from .elemwise import ceil


__all__ = [ __all__ = [
"add_axis",
"arange", "arange",
"broadcast",
"broadcast_to",
"concat", "concat",
"cond_take", "cond_take",
"transpose",
"add_axis",
"expand_dims",
"eye", "eye",
"flatten", "flatten",
"full", "full",
"full_like", "full_like",
"gather", "gather",
"identity",
"linspace", "linspace",
"ones", "ones",
"ones_like", "ones_like",
"param_pack_concat",
"param_pack_split",
"reshape", "reshape",
"remove_axis",
"split", "split",
"squeeze",
"stack", "stack",
"scatter", "scatter",
"transpose", "transpose",
@@ -61,11 +57,10 @@ __all__ = [
] ]




def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor:
def eye(N, M=None, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor:
"""Returns a 2D tensor with ones on the diagonal and zeros elsewhere. """Returns a 2D tensor with ones on the diagonal and zeros elsewhere.


:param shape: expected shape of otuput tensor.
:param m: number of columns. Default: None
:param shape: expected shape of output tensor.
:param dtype: data type. Default: None :param dtype: data type. Default: None
:param device: compute node of the matrix. Default: None :param device: compute node of the matrix. Default: None
:return: eye matrix. :return: eye matrix.
@@ -77,8 +72,7 @@ def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor:
import numpy as np import numpy as np
import megengine.functional as F import megengine.functional as F


data_shape = (4, 6)
out = F.eye(data_shape, dtype=np.float32)
out = F.eye(4, 6, dtype=np.float32)
print(out.numpy()) print(out.numpy())


Outputs: Outputs:
@@ -91,8 +85,17 @@ def eye(shape, *, dtype="float32", device: Optional[CompNode] = None) -> Tensor:
[0. 0. 0. 1. 0. 0.]] [0. 0. 0. 1. 0. 0.]]


""" """
if M is not None:
if isinstance(N, Tensor) or isinstance(M, Tensor):
shape = astensor1d((N, M))
else:
shape = Tensor([N, M], dtype="int32", device=device)
elif isinstance(N, Tensor):
shape = N
else:
shape = Tensor(N, dtype="int32", device=device)
op = builtin.Eye(k=0, dtype=dtype, comp_node=device) op = builtin.Eye(k=0, dtype=dtype, comp_node=device)
(result,) = apply(op, Tensor(shape, dtype="int32", device=device))
(result,) = apply(op, shape)
return result return result




@@ -106,7 +109,7 @@ def full(shape, value, dtype="float32", device=None):
(x,) = Const(value, dtype=dtype, device=device)( (x,) = Const(value, dtype=dtype, device=device)(
Tensor(value, dtype=dtype, device=device) Tensor(value, dtype=dtype, device=device)
) )
return broadcast(x, shape)
return broadcast_to(x, shape)




def ones(shape, dtype="float32", device=None): def ones(shape, dtype="float32", device=None):
@@ -160,7 +163,7 @@ def zeros_like(inp: Tensor) -> Tensor:
print(out.numpy()) print(out.numpy())


Outputs: Outputs:
.. testoutput:: .. testoutput::


[[0 0 0] [[0 0 0]
@@ -171,7 +174,7 @@ def zeros_like(inp: Tensor) -> Tensor:




def ones_like(inp: Tensor) -> Tensor: def ones_like(inp: Tensor) -> Tensor:
"""Returns a identity tensor with the same shape as input tensor.
"""Returns a ones tensor with the same shape as input tensor.
""" """
return ones(inp.shape, dtype=inp.dtype, device=inp.device) return ones(inp.shape, dtype=inp.dtype, device=inp.device)


@@ -182,19 +185,7 @@ def full_like(inp: Tensor, value: Union[int, float]) -> Tensor:
return full(inp.shape, value, dtype=inp.dtype, device=inp.device) return full(inp.shape, value, dtype=inp.dtype, device=inp.device)




def identity(inp: Tensor) -> Tensor:
"""Applies an identity transform to the input tensor.

:param inp: input tensor.
:return: output tensor.
"""
op = builtin.Identity()
(data,) = convert_inputs(inp)
(output,) = apply(op, data)
return output


def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor:
def broadcast_to(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor:
""" """
Broadcasts a tensor to given shape. Broadcasts a tensor to given shape.


@@ -211,7 +202,7 @@ def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor:
import megengine.functional as F import megengine.functional as F


data = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3)) data = tensor(np.arange(0, 6, dtype=np.float32).reshape(2, 3))
out = F.broadcast(data, (4, 2, 3))
out = F.broadcast_to(data, (4, 2, 3))
print(out.numpy()) print(out.numpy())


Outputs: Outputs:
@@ -231,9 +222,7 @@ def broadcast(inp: Tensor, shape: Union[int, Iterable[int]]) -> Tensor:
[3. 4. 5.]]] [3. 4. 5.]]]


""" """
shape = astensor1d(shape, inp, dtype="int32", device=inp.device)
(result,) = apply(builtin.Broadcast(), inp, shape)
return result
return _broadcast(inp, shape)




def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor: def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor:
@@ -241,8 +230,8 @@ def concat(inps: Iterable[Tensor], axis: int = 0, device=None) -> Tensor:
Concat some tensors Concat some tensors


:param inps: input tensors to concat. :param inps: input tensors to concat.
:param axis: dimension over which the tensors are concatenated. Default: 0
:param device: comp node output on. Default: None
:param axis: over which dimension the tensors are concatenated. Default: 0
:param device: which device output will be. Default: None
:return: output tensor. :return: output tensor.


Examples: Examples:
@@ -290,7 +279,7 @@ def stack(inps, axis=0, device=None):


:param inps: input tensors. :param inps: input tensors.
:param axis: which axis will be concatenated. :param axis: which axis will be concatenated.
:param device: The comp node output on. Default: None
:param device: the device output will be. Default: None
:return: output concatenated tensor. :return: output concatenated tensor.


Examples: Examples:
@@ -322,7 +311,7 @@ def stack(inps, axis=0, device=None):
if len(shapes) != 1: if len(shapes) != 1:
raise ValueError("All input tensors must have the same shape") raise ValueError("All input tensors must have the same shape")


inps = [add_axis(inp, axis=axis) for inp in inps]
inps = [expand_dims(inp, axis=axis) for inp in inps]
return concat(inps, axis=axis, device=device) return concat(inps, axis=axis, device=device)




@@ -331,7 +320,7 @@ def split(inp, nsplits_or_sections, axis=0):
When nsplits_or_sections is int, the last tensor may be smaller than others. When nsplits_or_sections is int, the last tensor may be smaller than others.


:param inp: input tensor. :param inp: input tensor.
:param nsplits_or_sections: number of sub tensors or section information list.
:param nsplits_or_sections: number of sub tensors or sections information list.
:param axis: which axis will be splited. :param axis: which axis will be splited.
:return: output tensor list. :return: output tensor list.


@@ -399,8 +388,7 @@ def _get_idx(index, axis):
0, index.shape[i] - 1, index.shape[i], device=index.device, 0, index.shape[i] - 1, index.shape[i], device=index.device,
) )
arange = ( arange = (
arange.reshape(*shape)
.broadcast(index.shape)
broadcast_to(arange.reshape(*shape), index.shape)
.reshape(-1) .reshape(-1)
.astype(np.int32) .astype(np.int32)
) )
@@ -411,7 +399,8 @@ def _get_idx(index, axis):




def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor: def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor:
r"""Gathers data from inp on axis using index.
# TODO: rewrite doc
r"""Gathers data from input tensor on axis using index.


For a 3-D tensor, the output is specified by:: For a 3-D tensor, the output is specified by::


@@ -419,14 +408,14 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor:
out[i][j][k] = inp[i][index[i][j][k]][k] # if axis == 1 out[i][j][k] = inp[i][index[i][j][k]][k] # if axis == 1
out[i][j][k] = inp[i][j][index[i][j][k]] # if axis == 2 out[i][j][k] = inp[i][j][index[i][j][k]] # if axis == 2


if inp is an n-dimensional tensor with size
if input tensor is a n-dimensional tensor with size
:math:`(x_0,x_1,...,x_{i-1},x_i,x_{i+1},...,x_{n-1})` and axis=i, :math:`(x_0,x_1,...,x_{i-1},x_i,x_{i+1},...,x_{n-1})` and axis=i,
then index must be an n-dimensional tensor with size
then index must be a n-dimensional tensor with size
:math:`(x_0,x_1,...,x_{i-1},y,x_{i+1},...,x_{n-1})` where :math:`y\ge 1` and :math:`(x_0,x_1,...,x_{i-1},y,x_{i+1},...,x_{n-1})` where :math:`y\ge 1` and
output will have the same size as index. output will have the same size as index.


:param inp: input tensor. :param inp: input tensor.
:param axis: axis along which to index.
:param axis: along which axis to index.
:param index: indices of elements to gather. :param index: indices of elements to gather.
:return: output tensor. :return: output tensor.


@@ -482,20 +471,21 @@ def gather(inp: Tensor, axis: int, index: Tensor) -> Tensor:




def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor: def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor:
r"""Writes all values from the tensor source into inp
# TODO: rewrite doc
r"""Writes all values from the tensor source into input tensor
at the indices specified in the index tensor. at the indices specified in the index tensor.


For each value in source, its output index is specified by its index For each value in source, its output index is specified by its index
in source for ``axis != dimension`` and by the corresponding value in in source for ``axis != dimension`` and by the corresponding value in
index for ``axis = dimension``. index for ``axis = dimension``.


For a 3-D tensor, inp is updated as::
For a 3-D tensor, input tensor is updated as::


inp[index[i][j][k]][j][k] = source[i][j][k] # if axis == 0 inp[index[i][j][k]][j][k] = source[i][j][k] # if axis == 0
inp[i][index[i][j][k]][k] = source[i][j][k] # if axis == 1 inp[i][index[i][j][k]][k] = source[i][j][k] # if axis == 1
inp[i][j][index[i][j][k]] = source[i][j][k] # if axis == 2 inp[i][j][index[i][j][k]] = source[i][j][k] # if axis == 2


inp, index and source should have same number of dimensions.
``inp``, ``index`` and ``source`` should have same number of dimensions.


It is also required that ``source.shape(d) <= inp.shape(d)`` and ``index.shape(d) == source.shape(d)`` It is also required that ``source.shape(d) <= inp.shape(d)`` and ``index.shape(d) == source.shape(d)``
for all dimensions ``d``. for all dimensions ``d``.
@@ -504,10 +494,10 @@ def scatter(inp: Tensor, axis: int, index: Tensor, source: Tensor) -> Tensor:


.. note:: .. note::
Please notice that, due to performance issues, the result is uncertain on the GPU device Please notice that, due to performance issues, the result is uncertain on the GPU device
if scatter difference positions from source to the same destination position
if scattering different positions from source to the same destination position
regard to index tensor. regard to index tensor.


Show the case using the following examples, the oup[0][2] is maybe
Check the following examples, the oup[0][2] is maybe
from source[0][2] which value is 0.2256 or source[1][2] which value is 0.5339 from source[0][2] which value is 0.2256 or source[1][2] which value is 0.5339
if set the index[1][2] from 1 to 0. if set the index[1][2] from 1 to 0.


@@ -593,7 +583,7 @@ def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor:


\textrm{out}_i = x_i \textrm{ if } \textrm{mask}_i \textrm{ is True else } y_i \textrm{out}_i = x_i \textrm{ if } \textrm{mask}_i \textrm{ is True else } y_i


:param mask: a mask used for choosing x or y.
:param mask: a mask used for choosing ``x`` or ``y``.
:param x: first choice. :param x: first choice.
:param y: second choice. :param y: second choice.
:return: output tensor. :return: output tensor.
@@ -649,7 +639,7 @@ def where(mask: Tensor, x: Tensor, y: Tensor) -> Tensor:


def cond_take(mask: Tensor, x: Tensor) -> Tensor: def cond_take(mask: Tensor, x: Tensor) -> Tensor:
r""" r"""
Take elements from data if specific condition is satisfied on mask.
Takes elements from data if specific condition is satisfied on mask.
This operator has two outputs: the first is the elements taken, This operator has two outputs: the first is the elements taken,
and the second is the indices corresponding to those elements; and the second is the indices corresponding to those elements;
they are both 1-dimensional. High-dimension input would first be flattened. they are both 1-dimensional. High-dimension input would first be flattened.
@@ -696,7 +686,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor:
Swaps shapes and strides according to given pattern. Swaps shapes and strides according to given pattern.


:param inp: input tensor. :param inp: input tensor.
:param pattern: a list of integers including 0, 1, ... , ``ndim``-1,
:param pattern: a list of integers including 0, 1, ... , ``ndim``-1,
and any number of ``'x'`` char in dimensions where this tensor should be broadcasted. For examples: and any number of ``'x'`` char in dimensions where this tensor should be broadcasted. For examples:


* (``'x'``) -> make a 0d (scalar) into a 1d vector * (``'x'``) -> make a 0d (scalar) into a 1d vector
@@ -707,7 +697,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor:
* (2, 0, 1) -> AxBxC to CxAxB * (2, 0, 1) -> AxBxC to CxAxB
* (0, ``'x'``, 1) -> AxB to Ax1xB * (0, ``'x'``, 1) -> AxB to Ax1xB
* (1, ``'x'``, 0) -> AxB to Bx1xA * (1, ``'x'``, 0) -> AxB to Bx1xA
* (1,) -> This remove dimensions 0. It must be a broadcastable dimension (1xA to A)
* (1,) -> this removes dimensions 0. It must be a broadcastable dimension (1xA to A)


:return: output tensor. :return: output tensor.


@@ -730,13 +720,7 @@ def transpose(inp: Tensor, pattern: Iterable[int]) -> Tensor:
[1 0]] [1 0]]


""" """
op = builtin.Dimshuffle(pattern)
(inp,) = convert_inputs(inp)
(result,) = apply(op, inp)
return result


dimshuffle = transpose
return inp.transpose(pattern)




def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor: def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor:
@@ -745,8 +729,7 @@ def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor:
remain unchanged remain unchanged


:param inp: input tensor. :param inp: input tensor.
:param target_shape: target shape, the components would be concatenated to form the
target shape, and it can contain an element of -1 representing unspec_axis.
:param target_shape: target shape, it can contain an element of -1 representing ``unspec_axis``.


Examples: Examples:


@@ -773,26 +756,7 @@ def reshape(inp: Tensor, target_shape: Iterable[int]) -> Tensor:
[10 11]]] [10 11]]]


""" """
if isinstance(target_shape, (TensorBase, TensorWrapperBase)):
target_shape = target_shape.numpy()
target_shape = tuple(map(int, target_shape))
unspec_axis = None
for i, s in enumerate(target_shape):
if s < 0:
if s != -1:
raise ValueError("expect shape[{}] >= -1, got {}".format(i, s))
if unspec_axis is not None:
raise ValueError("multiple -1 in shape: {} & {}".format(unspec_axis, i))
unspec_axis = i

# TODO: device should be None (cpu)
(target_shape,) = Const(target_shape, dtype="int32", device=inp.device)(inp)
if unspec_axis is None:
op = builtin.Reshape()
else:
op = builtin.Reshape(unspec_axis=unspec_axis)
(x,) = apply(op, inp, target_shape)
return x
return inp.reshape(target_shape)




AxisAddRemove = builtin.AxisAddRemove AxisAddRemove = builtin.AxisAddRemove
@@ -837,7 +801,7 @@ def flatten(inp: Tensor, start_axis: int = 0, end_axis: int = -1) -> Tensor:
return inp.reshape(*target_shape) return inp.reshape(*target_shape)




def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
def expand_dims(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
r""" r"""
Adds dimension before given axis. Adds dimension before given axis.


@@ -854,7 +818,7 @@ def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
import megengine.functional as F import megengine.functional as F


x = tensor([1, 2]) x = tensor([1, 2])
out = F.add_axis(x, 0)
out = F.expand_dims(x, 0)
print(out.shape) print(out.shape)


Outputs: Outputs:
@@ -883,12 +847,7 @@ def add_axis(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
return result return result




add_axis = add_axis


def remove_axis(
inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None
) -> Tensor:
def squeeze(inp: Tensor, axis: Optional[Union[int, Sequence[int]]] = None) -> Tensor:
r""" r"""
Removes dimension of shape 1. Removes dimension of shape 1.


@@ -905,7 +864,7 @@ def remove_axis(
import megengine.functional as F import megengine.functional as F


x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1)) x = tensor(np.array([1, 2], dtype=np.int32).reshape(1, 1, 2, 1))
out = F.remove_axis(x, 3)
out = F.squeeze(x, 3)
print(out.shape) print(out.shape)


Outputs: Outputs:
@@ -915,25 +874,7 @@ def remove_axis(
(1, 1, 2) (1, 1, 2)


""" """
Param = builtin.AxisAddRemove.Param

def get_axes():
if axis is None:
return [i for i, s in enumerate(inp.shape) if s == 1]
try:
return [int(axis)]
except (TypeError, ValueError):
pass
return list(map(int, axis))

axis = get_axes()
axis = sorted(i + inp.ndim if i < 0 else i for i in axis)
axis = [a - i for i, a in enumerate(axis)]

param = Param(*map(builtin.AxisAddRemove.AxisDesc.make_remove, axis))
op = builtin.AxisAddRemove(param=param)
(result,) = apply(op, inp)
return result
return _remove_axis(inp, axis)




def linspace( def linspace(
@@ -962,7 +903,7 @@ def linspace(
print(a.numpy()) print(a.numpy())


Outputs: Outputs:
.. testoutput:: .. testoutput::


[ 3. 4.75 6.5 8.25 10. ] [ 3. 4.75 6.5 8.25 10. ]
@@ -982,15 +923,15 @@ def linspace(


def arange( def arange(
start: Union[int, float, Tensor] = 0, start: Union[int, float, Tensor] = 0,
end: Optional[Union[int, float, Tensor]] = None,
stop: Optional[Union[int, float, Tensor]] = None,
step: Union[int, float, Tensor] = 1, step: Union[int, float, Tensor] = 1,
dtype="float32", dtype="float32",
device: Optional[CompNode] = None, device: Optional[CompNode] = None,
) -> Tensor: ) -> Tensor:
r"""Returns a Tensor with values from start to end with adjacent interval step.
r"""Returns a tensor with values from start to stop with adjacent interval step.


:param start: starting value of the squence, shoule be scalar. :param start: starting value of the squence, shoule be scalar.
:param end: ending value of the squence, shoule be scalar.
:param stop: ending value of the squence, shoule be scalar.
:param step: gap between each pair of adjacent values. Default: 1 :param step: gap between each pair of adjacent values. Default: 1
:param dtype: result data type. :param dtype: result data type.
:return: generated tensor. :return: generated tensor.
@@ -1004,7 +945,7 @@ def arange(


a = F.arange(5) a = F.arange(5)
print(a.numpy()) print(a.numpy())
Outputs: Outputs:


Outputs: Outputs:
@@ -1014,96 +955,18 @@ def arange(
[0. 1. 2. 3. 4.] [0. 1. 2. 3. 4.]


""" """
if end is None:
start, end = 0, start
if stop is None:
start, stop = 0, start


if isinstance(start, Tensor): if isinstance(start, Tensor):
start = start.astype("float32") start = start.astype("float32")
if isinstance(end, Tensor):
end = end.astype("float32")
if isinstance(stop, Tensor):
stop = stop.astype("float32")
if isinstance(step, Tensor): if isinstance(step, Tensor):
step = step.astype("float32") step = step.astype("float32")
num = ceil(Tensor((end - start) / step, device=device))
num = ceil(Tensor((stop - start) / step, device=device))
stop = start + step * (num - 1) stop = start + step * (num - 1)
result = linspace(start, stop, num, device=device) result = linspace(start, stop, num, device=device)
if np.dtype(dtype) == np.int32: if np.dtype(dtype) == np.int32:
return result.astype(dtype) return result.astype(dtype)
return result return result


def param_pack_split(inp: Tensor, offsets: List, shapes: List) -> Tensor:
r"""
Returns split Tensor to Tensor list as offsets and shapes described,
only used for parampack.

:param inp: input tensor.
:param offsets: offsets of outputs, length of 2 * n,
while n is tensor nums you want to split,
format `[begin0, end0, begin1, end1]`.
:param shapes: tensor shapes of outputs.
:return: split tensors.

Examples:

.. testcode::

import numpy as np
import megengine.functional as F
from megengine import tensor

a = tensor(np.ones((10,), np.int32))
b, c = F.param_pack_split(a, [0, 1, 1, 10], [(1,), (3, 3)])
print(b.numpy())
print(c.numpy())
Outputs:
.. testoutput::

[1]
[[1 1 1]
[1 1 1]
[1 1 1]]

"""
op = builtin.ParamPackSplit()
op.offsets = offsets
op.shapes = shapes
return apply(op, inp)


def param_pack_concat(inps: List, offsets: Tensor, offsets_val: List) -> Tensor:
r"""
Returns concat Tensor, only used for parampack.

:param inps: input tensors.
:param offsets: device value of offsets.
:param offsets_val: offsets of inputs, length of 2 * n,
format [begin0, end0, begin1, end1].
:return: concat tensors

Examples:

.. testcode::

import numpy as np
import megengine.functional as F
from megengine import tensor

a = tensor(np.ones((1,), np.int32))
b = tensor(np.ones((3, 3), np.int32))
offsets_val = [0, 1, 1, 10]
offsets = tensor(offsets_val, np.int32)
c = F.param_pack_concat([a, b], offsets, offsets_val)
print(c.numpy())
Outputs:
.. testoutput::

[1 1 1 1 1 1 1 1 1 1]

"""
op = builtin.ParamPackConcat()
op.offsets = offsets_val
return apply(op, *inps, offsets)[0]

+ 19
- 24
imperative/python/megengine/functional/utils.py View File

@@ -11,18 +11,24 @@ from typing import Iterable, Union


import numpy as np import numpy as np


from ..core.ops.builtin import Copy
from ..core._wrap import device as as_device
from ..core.ops.builtin import Copy, Identity
from ..core.tensor import Tensor from ..core.tensor import Tensor
from ..core.tensor.core import apply from ..core.tensor.core import apply
from .math import topk as _topk from .math import topk as _topk
from .tensor import transpose as _transpose
from .tensor import broadcast_to, transpose


__all__ = [
"topk_accuracy",
"copy",
]


def accuracy(

def topk_accuracy(
logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1 logits: Tensor, target: Tensor, topk: Union[int, Iterable[int]] = 1
) -> Union[Tensor, Iterable[Tensor]]: ) -> Union[Tensor, Iterable[Tensor]]:
r""" r"""
Calculate the classification accuracy given predicted logits and ground-truth labels.
Calculates the classification accuracy given predicted logits and ground-truth labels.


:param logits: model predictions of shape `[batch_size, num_classes]`, :param logits: model predictions of shape `[batch_size, num_classes]`,
representing the probability (likelyhood) of each class. representing the probability (likelyhood) of each class.
@@ -40,7 +46,7 @@ def accuracy(


logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10)) logits = tensor(np.arange(80, dtype=np.int32).reshape(8,10))
target = tensor(np.arange(8, dtype=np.int32)) target = tensor(np.arange(8, dtype=np.int32))
top1, top5 = F.accuracy(logits, target, (1, 5))
top1, top5 = F.topk_accuracy(logits, target, (1, 5))
print(top1.numpy(), top5.numpy()) print(top1.numpy(), top5.numpy())


Outputs: Outputs:
@@ -54,8 +60,8 @@ def accuracy(
_, pred = _topk(logits, k=max(topk), descending=True) _, pred = _topk(logits, k=max(topk), descending=True)
accs = [] accs = []
for k in topk: for k in topk:
correct = pred[:, :k].detach() == _transpose(target, (0, "x")).broadcast(
target.shape[0], k
correct = pred[:, :k].detach() == broadcast_to(
transpose(target, (0, "x")), (target.shape[0], k)
) )
accs.append(correct.astype(np.float32).sum() / target.shape[0]) accs.append(correct.astype(np.float32).sum() / target.shape[0])
if len(topk) == 1: # type: ignore[arg-type] if len(topk) == 1: # type: ignore[arg-type]
@@ -63,25 +69,12 @@ def accuracy(
return accs return accs




def zero_grad(inp: Tensor) -> Tensor:
r"""
Returns a tensor which is treated as constant during backward gradient calcuation,
i.e. its gradient is zero.

:param inp: Input tensor.

See implementation of :func:`~.softmax` for example.
"""
print("zero_grad is obsoleted, please use detach instead")
raise NotImplementedError


def copy(inp, cn):
def copy(inp, device=None):
r""" r"""
Copy tensor to another device.
Copies tensor to another device.


:param inp: input tensor. :param inp: input tensor.
:param cn: device that you copy to.
:param device: destination device.


Examples: Examples:


@@ -101,4 +94,6 @@ def copy(inp, cn):


[1 2 3] [1 2 3]
""" """
return apply(Copy(comp_node=cn), inp)[0]
if device is None:
return apply(Identity(), inp)[0]
return apply(Copy(comp_node=as_device(device).to_c()), inp)[0]

+ 3
- 3
imperative/python/megengine/hub/exceptions.py View File

@@ -19,12 +19,12 @@ class InvalidGitHost(FetcherError):




class GitPullError(FetcherError): class GitPullError(FetcherError):
"""A git pull error occurred"""
"""A git pull error occurred."""




class GitCheckoutError(FetcherError): class GitCheckoutError(FetcherError):
"""A git checkout error occurred"""
"""A git checkout error occurred."""




class InvalidProtocol(FetcherError): class InvalidProtocol(FetcherError):
"""The protocol provided was somehow invalid"""
"""The protocol provided was somehow invalid."""

+ 14
- 14
imperative/python/megengine/hub/fetcher.py View File

@@ -106,20 +106,20 @@ class GitSSHFetcher(RepoFetcherBase):


:param git_host: :param git_host:
host address of git repo. host address of git repo.
example: github.com
Example: github.com
:param repo_info: :param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified. tag/branch. The default branch is ``master`` if not specified.
example: ``"brain_sdk/MegBrain[:hub]"``
Example: ``"brain_sdk/MegBrain[:hub]"``
:param use_cache: :param use_cache:
whether to use locally fetched code or completely re-fetch
whether to use locally fetched code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param silent: :param silent:
whether to accept the stdout and stderr of the subprocess with PIPE, instead of whether to accept the stdout and stderr of the subprocess with PIPE, instead of
displaying on the screen
displaying on the screen.
:return: :return:
directory where the repo code is stored
directory where the repo code is stored.
""" """
if not cls._check_git_host(git_host): if not cls._check_git_host(git_host):
raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host))
@@ -215,24 +215,24 @@ class GitHTTPSFetcher(RepoFetcherBase):
silent: bool = True, silent: bool = True,
) -> str: ) -> str:
""" """
Fetches git repo by HTTPS protocol
Fetches git repo by HTTPS protocol.


:param git_host: :param git_host:
host address of git repo
example: github.com
host address of git repo.
Example: github.com
:param repo_info: :param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified. tag/branch. The default branch is ``master`` if not specified.
example: ``"brain_sdk/MegBrain[:hub]"``
Example: ``"brain_sdk/MegBrain[:hub]"``
:param use_cache: :param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param silent: :param silent:
whether to accept the stdout and stderr of the subprocess with PIPE, instead of whether to accept the stdout and stderr of the subprocess with PIPE, instead of
displaying on the screen
displaying on the screen.
:return: :return:
directory where the repo code is stored
directory where the repo code is stored.
""" """
if not cls._check_git_host(git_host): if not cls._check_git_host(git_host):
raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host)) raise InvalidGitHost("git_host: '{}' is malformed.".format(git_host))


+ 23
- 23
imperative/python/megengine/hub/hub.py View File

@@ -94,24 +94,24 @@ def _init_hub(
commit: str = None, commit: str = None,
protocol: str = DEFAULT_PROTOCOL, protocol: str = DEFAULT_PROTOCOL,
): ):
"""Imports hubmodule like python import
"""Imports hubmodule like python import.


:param repo_info: :param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified. tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"`` Example: ``"brain_sdk/MegBrain[:hub]"``
:param git_host: :param git_host:
host address of git repo
host address of git repo.
Example: github.com Example: github.com
:param use_cache: :param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol: :param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH. The value should be one of HTTPS, SSH.
:return: :return:
hubconf.py as a python module
a python module.
""" """
cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub")) cache_dir = os.path.expanduser(os.path.join(_get_megengine_home(), "hub"))
os.makedirs(cache_dir, exist_ok=True) os.makedirs(cache_dir, exist_ok=True)
@@ -137,24 +137,24 @@ def list(
commit: str = None, commit: str = None,
protocol: str = DEFAULT_PROTOCOL, protocol: str = DEFAULT_PROTOCOL,
) -> List[str]: ) -> List[str]:
"""Lists all entrypoints available in repo hubconf
"""Lists all entrypoints available in repo hubconf.


:param repo_info: :param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
tag/branch. The default branch is ``master`` if not specified. tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"`` Example: ``"brain_sdk/MegBrain[:hub]"``
:param git_host: :param git_host:
host address of git repo
host address of git repo.
Example: github.com Example: github.com
:param use_cache: :param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol: :param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH. The value should be one of HTTPS, SSH.
:return: :return:
all entrypoint names of the model
all entrypoint names of the model.
""" """
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol)


@@ -182,14 +182,14 @@ def load(
tag/branch. The default branch is ``master`` if not specified. tag/branch. The default branch is ``master`` if not specified.
Example: ``"brain_sdk/MegBrain[:hub]"`` Example: ``"brain_sdk/MegBrain[:hub]"``
:param entry: :param entry:
an entrypoint defined in hubconf
an entrypoint defined in hubconf.
:param git_host: :param git_host:
host address of git repo
host address of git repo.
Example: github.com Example: github.com
:param use_cache: :param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol: :param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH. The value should be one of HTTPS, SSH.
@@ -217,9 +217,9 @@ def help(
) -> str: ) -> str:
"""This function returns docstring of entrypoint ``entry`` by following steps: """This function returns docstring of entrypoint ``entry`` by following steps:


1. Pull the repo code specified by git and repo_info
1. Pull the repo code specified by git and repo_info.
2. Load the entry defined in repo's hubconf.py 2. Load the entry defined in repo's hubconf.py
3. Return docstring of function entry
3. Return docstring of function entry.


:param repo_info: :param repo_info:
a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional a string with format ``"repo_owner/repo_name[:tag_name/:branch_name]"`` with an optional
@@ -228,17 +228,17 @@ def help(
:param entry: :param entry:
an entrypoint defined in hubconf.py an entrypoint defined in hubconf.py
:param git_host: :param git_host:
host address of git repo
host address of git repo.
Example: github.com Example: github.com
:param use_cache: :param use_cache:
whether to use locally cached code or completely re-fetch
whether to use locally cached code or completely re-fetch.
:param commit: :param commit:
commit id on github or gitlab
commit id on github or gitlab.
:param protocol: :param protocol:
which protocol to use to get the repo, and HTTPS protocol only supports public repo on github. which protocol to use to get the repo, and HTTPS protocol only supports public repo on github.
The value should be one of HTTPS, SSH. The value should be one of HTTPS, SSH.
:return: :return:
docstring of entrypoint ``entry``
docstring of entrypoint ``entry``.
""" """
hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol) hubmodule = _init_hub(repo_info, git_host, use_cache, commit, protocol)


@@ -255,10 +255,10 @@ def load_serialized_obj_from_url(url: str, model_dir=None) -> Any:
If the object is already present in ``model_dir``, it's deserialized and If the object is already present in ``model_dir``, it's deserialized and
returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``. returned. If no ``model_dir`` is specified, it will be ``MGE_HOME/serialized``.


:param url: url to serialized object
:param model_dir: dir to cache target serialized file
:param url: url to serialized object.
:param model_dir: dir to cache target serialized file.


:return: loaded object
:return: loaded object.
""" """
if model_dir is None: if model_dir is None:
model_dir = os.path.join(_get_megengine_home(), "serialized") model_dir = os.path.join(_get_megengine_home(), "serialized")


+ 7
- 7
imperative/python/megengine/hub/tools.py View File

@@ -15,10 +15,10 @@ from typing import Iterator


def load_module(name: str, path: str) -> types.ModuleType: def load_module(name: str, path: str) -> types.ModuleType:
""" """
Loads module specified by name and path
Loads module specified by name and path.


:param name: module name
:param path: module path
:param name: module name.
:param path: module path.
""" """
spec = importlib.util.spec_from_file_location(name, path) spec = importlib.util.spec_from_file_location(name, path)
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec)
@@ -27,18 +27,18 @@ def load_module(name: str, path: str) -> types.ModuleType:




def check_module_exists(module: str) -> bool: def check_module_exists(module: str) -> bool:
"""Checks whether python module exists or not
"""Checks whether python module exists or not.


:param module: name of module
:param module: name of module.
""" """
return importlib.util.find_spec(module) is not None return importlib.util.find_spec(module) is not None




@contextmanager @contextmanager
def cd(target: str) -> Iterator[None]: def cd(target: str) -> Iterator[None]:
"""Changes current directory to target
"""Changes current directory to target.


:param target: target directory
:param target: target directory.
""" """
prev = os.getcwd() prev = os.getcwd()
os.chdir(os.path.expanduser(target)) os.chdir(os.path.expanduser(target))


+ 116
- 63
imperative/python/megengine/jit/tracing.py View File

@@ -36,6 +36,13 @@ active_trace = None
skip_tracing = False skip_tracing = False




def is_tracing():
if active_trace is None:
return False
else:
return not skip_tracing


@contextlib.contextmanager @contextlib.contextmanager
def exclude_from_trace(): def exclude_from_trace():
global skip_tracing global skip_tracing
@@ -125,6 +132,9 @@ class trace:
self._graph_opt_level = opt_level self._graph_opt_level = opt_level
self._tensor_shape = tensor_shape self._tensor_shape = tensor_shape


self._reset()

def _reset(self):
self._untraced = True self._untraced = True
self._tinfo = [] # handle -> TensorInfo self._tinfo = [] # handle -> TensorInfo
self._seq = [] self._seq = []
@@ -257,77 +267,117 @@ class trace:
def _record_const(self, op, outputs): def _record_const(self, op, outputs):
pass pass


@contextlib.contextmanager
def _setup(self):
def _set_active(self, active: bool):
global active_trace global active_trace
if active_trace:
raise NotImplementedError("sorry, not implemented: nested trace")
active_trace = self

if self._untraced:
apply.enable(apply_with_tracing)
apply.enable(apply_const_with_tracing)
if self._symbolic:
apply.enable(apply_symbolic_mode)
apply.enable(apply_const_symbolic_mode)
self._lazy_eval_graph = G.Graph()
if active:
if active_trace:
raise NotImplementedError("sorry, not implemented: nested trace")
active_trace = self
else: else:
apply.enable(apply_compiled_mode)
if self._graph is None:
self._compile()
self._graph.execute()

yield

assert active_trace is self
active_trace = None

def _init_trace(self, symbolic: bool):
apply.enable(apply_with_tracing)
apply.enable(apply_const_with_tracing)
if symbolic:
apply.enable(apply_symbolic_mode)
apply.enable(apply_const_symbolic_mode)
self._lazy_eval_graph = G.Graph()

def _take_escaped_tensors(self):
escaped_tensors = tuple(self._active_tensors) escaped_tensors = tuple(self._active_tensors)
self._active_tensors.clear() self._active_tensors.clear()
return escaped_tensors


if self._untraced:
for x in escaped_tensors:
info = self._tinfo[x._TraceMixin__handle]
info.data_read = True
x._TraceMixin__restore()
if self._inputs_to_restore:
for x in self._inputs_to_restore:
def _lazy_eval(self, lazy_eval_graph, lazy_eval_tensors):
active_lazy_eval_tensors = []
visited = set()
readers = []
for x in lazy_eval_tensors:
x = x()
if x is None or x in visited:
continue
reader = G.OutputNode(x._LazyEvalTensor__varnode).outputs[0]
readers.append(reader)
active_lazy_eval_tensors.append(x)
visited.add(x)
self._apply_graph_options(lazy_eval_graph)
lazy_eval_graph.compile(*readers)
lazy_eval_graph()
for r, x in zip(readers, active_lazy_eval_tensors):
assign_raw_tensor(x, as_raw_tensor(r.op.get_value()))

@contextlib.contextmanager
def _setup(self):
interrupted = False

def do_enter():
self._set_active(True)
if self._untraced:
self._init_trace(self._symbolic)
else:
apply.enable(apply_compiled_mode)
if self._graph is None:
self._compile()
self._graph.execute()

def do_finalize():
escaped_tensors = self._take_escaped_tensors()
if self._untraced:
for x in escaped_tensors:
info = self._tinfo[x._TraceMixin__handle]
info.data_read = True
x._TraceMixin__restore() x._TraceMixin__restore()
if self._symbolic:
# eval lazy eval tensors
if self._lazy_eval_tensors:
lazy_eval_tensors = []
visited = set()
readers = []
for x in self._lazy_eval_tensors:
x = x()
if x is None or x in visited:
continue
reader = G.OutputNode(x._LazyEvalTensor__varnode).outputs[0]
readers.append(reader)
lazy_eval_tensors.append(x)
visited.add(x)
self._apply_graph_options(self._lazy_eval_graph)
self._lazy_eval_graph.compile(*readers)
self._lazy_eval_graph()
for r, x in zip(readers, lazy_eval_tensors):
assign_raw_tensor(x, as_raw_tensor(r.op.get_value()))
if self._inputs_to_restore:
for x in self._inputs_to_restore:
x._TraceMixin__restore()
if self._symbolic and self._lazy_eval_tensors:
# eval lazy eval tensors
self._lazy_eval(self._lazy_eval_graph, self._lazy_eval_tensors)
self._lazy_eval_graph = None self._lazy_eval_graph = None
self._lazy_eval_tensors = None self._lazy_eval_tensors = None
self._untraced = False
else:
if self._pc != len(self._seq):
raise TraceMismatchError("premature end")
for x in escaped_tensors:
assign_raw_tensor(x, as_raw_tensor(x._dev_tensor()))
self._graph.wait()
self._reset_exec_env()
self._untraced = False
else:
# compiled_tensor leaks
if self._pc == len(self._seq):
for x in escaped_tensors:
try:
assign_raw_tensor(x, as_raw_tensor(x._dev_tensor()))
except TraceMismatchError:
# TraceMismatchError thrown in do_exit
pass
self._graph.wait()
self._reset_exec_env()

# reset status
self._pc = 0 self._pc = 0

self._tensor_remaps = None
apply.disable(apply_with_tracing)
apply.disable(apply_const_with_tracing)
apply.disable(apply_symbolic_mode)
apply.disable(apply_const_symbolic_mode)
apply.disable(apply_compiled_mode)
active_trace = None
self._tensor_remaps = None
apply.disable(apply_with_tracing)
apply.disable(apply_const_with_tracing)
apply.disable(apply_symbolic_mode)
apply.disable(apply_const_symbolic_mode)
apply.disable(apply_compiled_mode)
self._set_active(False)

def do_exit():
if not self._untraced and self._pc != len(self._seq):
raise TraceMismatchError("premature end")
if not self._symbolic or not self._untraced:
for x in self._active_tensors:
x._dev_tensor()

try:
do_enter()
yield
do_exit()
except:
interrupted = True
raise
finally:
do_finalize()
if interrupted:
self._reset()


def _begin_excluded_region(self): def _begin_excluded_region(self):
if self._capture_as_const: if self._capture_as_const:
@@ -368,6 +418,7 @@ class trace:
def _compile(self): def _compile(self):
graph = self._graph = G.Graph() graph = self._graph = G.Graph()
graph.options.no_force_inplace = True graph.options.no_force_inplace = True
graph.options.async_exec_level = 0b100
self._apply_graph_options(graph) self._apply_graph_options(graph)
# graph.options.graph_opt_level = 0 # graph.options.graph_opt_level = 0
need_reset_nodes = self._need_reset_nodes = [] need_reset_nodes = self._need_reset_nodes = []
@@ -570,7 +621,9 @@ class trace:
if h not in h2v: if h not in h2v:
assert info.external assert info.external
assert info.bound_data assert info.bound_data
h2v[h] = graph.make_const(info.bound_data._dev_tensor())
h2v[h] = graph.make_const(
info.bound_data.numpy(), dtype=info.dtype, device=info.device
)
ivars.append(h2v[h]) ivars.append(h2v[h])
ovars = apply(op, *ivars) ovars = apply(op, *ivars)
assert len(ovars) == len(ohandles) assert len(ovars) == len(ohandles)


+ 1
- 1
imperative/python/megengine/logger.py View File

@@ -12,7 +12,7 @@ import os
import sys import sys


_all_loggers = [] _all_loggers = []
_default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "ERROR")
_default_level_name = os.getenv("MEGENGINE_LOGGING_LEVEL", "INFO")
_default_level = logging.getLevelName(_default_level_name.upper()) _default_level = logging.getLevelName(_default_level_name.upper())






+ 1
- 0
imperative/python/megengine/module/__init__.py View File

@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.


from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax from .activation import LeakyReLU, PReLU, ReLU, Sigmoid, Softmax
from .adaptive_pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d
from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm from .batchnorm import BatchNorm1d, BatchNorm2d, SyncBatchNorm
from .concat import Concat from .concat import Concat
from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d from .conv import Conv2d, ConvRelu2d, ConvTranspose2d, LocalConv2d


+ 7
- 5
imperative/python/megengine/module/activation.py View File

@@ -20,10 +20,10 @@ class Softmax(Module):
.. math:: .. math::
\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)} \text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}


It is applied to an n-dimensional input Tensor and rescaling them so that the elements of the
n-dimensional output Tensor lie in the range of `[0, 1]` and sum to 1.
It is applied to all elements along axis, and rescales elements so that
they stay in the range `[0, 1]` and sum to 1.


:param axis: An axis along which softmax will be applied. By default,
:param axis: Along which axis softmax will be applied. By default,
softmax will apply along the highest ranked axis. softmax will apply along the highest ranked axis.


Examples: Examples:
@@ -55,6 +55,9 @@ class Softmax(Module):
def forward(self, inputs): def forward(self, inputs):
return softmax(inputs, self.axis) return softmax(inputs, self.axis)


def _module_info_string(self) -> str:
return "axis={axis}".format(axis=self.axis)



class Sigmoid(Module): class Sigmoid(Module):
r""" r"""
@@ -138,8 +141,7 @@ class PReLU(Module):
\end{cases} \end{cases}


Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses Here :math:`a` is a learnable parameter. When called without arguments, `PReLU()` uses
a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`,
a seperate :math:`a` is used for each input channle.
a single paramter :math:`a` across all input channel. If called with `PReLU(num_of_channels)`, each input channle will has it's own :math:`a`.


:param num_parameters: number of :math:`a` to learn, there is only two :param num_parameters: number of :math:`a` to learn, there is only two
values are legitimate: 1, or the number of channels at input. Default: 1 values are legitimate: 1, or the number of channels at input. Default: 1


+ 114
- 0
imperative/python/megengine/module/adaptive_pooling.py View File

@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from abc import abstractmethod
from typing import Tuple, Union

from ..functional import adaptive_avg_pool2d, adaptive_max_pool2d
from ..tensor import Parameter, Tensor
from .module import Module


class _AdaptivePoolNd(Module):
def __init__(
self, oshp: Union[Tuple[int, int], int, Tensor],
):
super(_AdaptivePoolNd, self).__init__()
self.oshp = oshp

@abstractmethod
def forward(self, inp):
pass


class AdaptiveMaxPool2d(_AdaptivePoolNd):
r"""Applies a 2D max adaptive pooling over an input.

For instance, given an input of the size :math:`(N, C, H, W)` and
an output shape :math:`(OH, OW)`, this layer generates the output of
the size :math:`(N, C, OH, OW)` through a process described as:

.. math::
\begin{aligned}
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1}
\text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
\end{aligned}

Kernel_size and stride can be inferred from input shape and out shape:
padding: (0, 0)
stride: (floor(IH / OH), floor(IW / OW))
kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w)

Examples:

.. testcode::

import numpy as np
import megengine as mge
import megengine.module as M

m = M.AdaptiveMaxPool2d((2, 2))
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4))
oup = m(inp)
print(oup.numpy())

Outputs:

.. testoutput::

[[[[5. 7.]
[13. 15.]]]]

"""

def forward(self, inp):
return adaptive_max_pool2d(inp, self.oshp)


class AdaptiveAvgPool2d(_AdaptivePoolNd):
r"""Applies a 2D average pooling over an input.

For instance, given an input of the size :math:`(N, C, H, W)` and
an output shape :math:`(OH, OW)`, this layer generates the output of
the size :math:`(N, C, OH, OW)` through a process described as:

.. math::

out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)

Kernel_size and stride can be inferred from input shape and out shape:
padding: (0, 0)
stride: (floor(IH / OH), floor(IW / OW))
kernel_size: (IH - (OH - 1) * stride_h, IW - (OW - 1) * stride_w)

Examples:

.. testcode::

import numpy as np
import megengine as mge
import megengine.module as M

m = M.AdaptiveAvgPool2d((2, 2))
inp = mge.tensor(np.arange(0, 16).astype("float32").reshape(1, 1, 4, 4))
oup = m(inp)
print(oup.numpy())

Outputs:

.. testoutput::

[[[[2.5 4.5]
[10.5 12.5]]]]

"""

def forward(self, inp):
return adaptive_avg_pool2d(inp, self.oshp)

+ 14
- 8
imperative/python/megengine/module/batchnorm.py View File

@@ -11,7 +11,7 @@ from typing import Optional
import numpy as np import numpy as np


from ..distributed.group import WORLD, Group from ..distributed.group import WORLD, Group
from ..functional import batch_norm2d, sync_batch_norm
from ..functional.nn import batch_norm, sync_batch_norm
from ..tensor import Parameter, Tensor from ..tensor import Parameter, Tensor
from . import init from . import init
from .module import Module from .module import Module
@@ -96,7 +96,7 @@ class _BatchNorm(Module):
else: else:
exponential_average_factor = 0.0 # useless exponential_average_factor = 0.0 # useless


output = batch_norm2d(
output = batch_norm(
inp, inp,
self.running_mean if self.track_running_stats else None, self.running_mean if self.track_running_stats else None,
self.running_var if self.track_running_stats else None, self.running_var if self.track_running_stats else None,
@@ -113,6 +113,13 @@ class _BatchNorm(Module):


return output return output


def _module_info_string(self) -> str:
s = (
"{num_features}, eps={eps}, momentum={momentum}, affine={affine}, "
"track_running_stats={track_running_stats}"
)
return s.format(**self.__dict__)



class SyncBatchNorm(_BatchNorm): class SyncBatchNorm(_BatchNorm):
r""" r"""
@@ -213,8 +220,8 @@ class BatchNorm2d(_BatchNorm):
of 0.9. of 0.9.


If :attr:`track_running_stats` is set to ``False``, this layer will not If :attr:`track_running_stats` is set to ``False``, this layer will not
keep running estimates, and batch statistics are instead used during
evaluation time.
keep running estimates, batch statistics is used during
evaluation time instead.


.. note:: .. note::
This :attr:`momentum` argument is different from one used in optimizer This :attr:`momentum` argument is different from one used in optimizer
@@ -229,15 +236,14 @@ class BatchNorm2d(_BatchNorm):
Spatial Batch Normalization. Spatial Batch Normalization.


:type num_features: int :type num_features: int
:param num_features: usually the :math:`C` from an input of size
:math:`(N, C, H, W)` or the highest ranked dimension of an input with
:param num_features: usually :math:`C` from an input of shape
:math:`(N, C, H, W)` or the highest ranked dimension of an input
less than 4D. less than 4D.
:type eps: float :type eps: float
:param eps: a value added to the denominator for numerical stability. :param eps: a value added to the denominator for numerical stability.
Default: 1e-5 Default: 1e-5
:type momentum: float :type momentum: float
:param momentum: the value used for the `running_mean` and `running_var`
computation.
:param momentum: the value used for the ``running_mean`` and ``running_var`` computation.
Default: 0.9 Default: 0.9
:type affine: bool :type affine: bool
:param affine: a boolean value that when set to True, this module has :param affine: a boolean value that when set to True, this module has


+ 31
- 18
imperative/python/megengine/module/conv.py View File

@@ -70,6 +70,21 @@ class _ConvNd(Module):
def _infer_bias_shape(self): def _infer_bias_shape(self):
pass pass


def _module_info_string(self):
s = "{in_channels}, {out_channels}, kernel_size={kernel_size}"

if self.stride != (1,) * len(self.stride):
s += ", stride={stride}"
if self.padding != (0,) * len(self.padding):
s += ", padding={padding}"
if self.dilation != (1,) * len(self.dilation):
s += ", dilation={dilation}"
if self.groups != 1:
s += ", groups={groups}"
if self.bias is None:
s += ", bias=False"
return s.format(**self.__dict__)



class Conv2d(_ConvNd): class Conv2d(_ConvNd):
r"""Applies a 2D convolution over an input tensor. r"""Applies a 2D convolution over an input tensor.
@@ -84,8 +99,8 @@ class Conv2d(_ConvNd):
\sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k) \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)


where :math:`\star` is the valid 2D cross-correlation operator, where :math:`\star` is the valid 2D cross-correlation operator,
:math:`N` is a batch size, :math:`C` denotes a number of channels,
:math:`H` is a height of input planes in pixels, and :math:`W` is
:math:`N` is batch size, :math:`C` denotes number of channels,
:math:`H` is height of input planes in pixels, and :math:`W` is
width in pixels. width in pixels.


When `groups == in_channels` and `out_channels == K * in_channels`, When `groups == in_channels` and `out_channels == K * in_channels`,
@@ -105,9 +120,8 @@ class Conv2d(_ConvNd):
:param padding: size of the paddings added to the input on both sides of its :param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0 spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1 :param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a "grouped convolution". When groups is not 1,
in_channels and out_channels must be divisible by groups,
:param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``,
and there would be an extra dimension at the beginning of the weight's and there would be an extra dimension at the beginning of the weight's
shape. Specifically, the shape of weight would be `(groups, shape. Specifically, the shape of weight would be `(groups,
out_channel // groups, in_channels // groups, *kernel_size)`. out_channel // groups, in_channels // groups, *kernel_size)`.
@@ -115,9 +129,9 @@ class Conv2d(_ConvNd):
True True
:param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default:
`CROSS_CORRELATION` `CROSS_CORRELATION`
:param compute_mode: When set to `DEFAULT`, no special requirements will be
placed on the precision of intermediate results. When set to `FLOAT32`,
float32 would be used for accumulator and intermediate result, but only
:param compute_mode: When set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32",
"Float32" would be used for accumulator and intermediate result, but only
effective when input and output are of float16 dtype. effective when input and output are of float16 dtype.


Examples: Examples:
@@ -221,7 +235,7 @@ class ConvTranspose2d(_ConvNd):
r"""Applies a 2D transposed convolution over an input tensor. r"""Applies a 2D transposed convolution over an input tensor.


This module is also known as a deconvolution or a fractionally-strided convolution. This module is also known as a deconvolution or a fractionally-strided convolution.
:class:`ConvTranspose2d` can ben seen as the gradient of :class:`Conv2d` operation
:class:`ConvTranspose2d` can be seen as the gradient of :class:`Conv2d` operation
with respect to its input. with respect to its input.


Convolution usually reduces the size of input, while transposed convolution works Convolution usually reduces the size of input, while transposed convolution works
@@ -237,8 +251,7 @@ class ConvTranspose2d(_ConvNd):
:param padding: size of the paddings added to the input on both sides of its :param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0 spatial dimensions. Only zero-padding is supported. Default: 0
:param dilation: dilation of the 2D convolution operation. Default: 1 :param dilation: dilation of the 2D convolution operation. Default: 1
:param groups: number of groups to divide input and output channels into,
so as to perform a "grouped convolution". When ``groups`` is not 1,
:param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``, ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
and there would be an extra dimension at the beginning of the weight's and there would be an extra dimension at the beginning of the weight's
shape. Specifically, the shape of weight would be ``(groups, shape. Specifically, the shape of weight would be ``(groups,
@@ -247,9 +260,9 @@ class ConvTranspose2d(_ConvNd):
True True
:param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default: :param conv_mode: Supports `CROSS_CORRELATION` or `CONVOLUTION`. Default:
`CROSS_CORRELATION` `CROSS_CORRELATION`
:param compute_mode: When set to `DEFAULT`, no special requirements will be
placed on the precision of intermediate results. When set to `FLOAT32`,
float32 would be used for accumulator and intermediate result, but only
:param compute_mode: When set to "DEFAULT", no special requirements will be
placed on the precision of intermediate results. When set to "FLOAT32",
"Float32" would be used for accumulator and intermediate result, but only
effective when input and output are of float16 dtype. effective when input and output are of float16 dtype.
""" """


@@ -327,7 +340,7 @@ class ConvTranspose2d(_ConvNd):




class LocalConv2d(Conv2d): class LocalConv2d(Conv2d):
r"""Applies a spatial convolution with untied kernels over an input 4D tensor.
r"""Applies a spatial convolution with untied kernels over an groupped channeled input 4D tensor.
It is also known as the locally connected layer. It is also known as the locally connected layer.


:param in_channels: number of input channels. :param in_channels: number of input channels.
@@ -340,9 +353,9 @@ class LocalConv2d(Conv2d):
:param stride: stride of the 2D convolution operation. Default: 1 :param stride: stride of the 2D convolution operation. Default: 1
:param padding: size of the paddings added to the input on both sides of its :param padding: size of the paddings added to the input on both sides of its
spatial dimensions. Only zero-padding is supported. Default: 0 spatial dimensions. Only zero-padding is supported. Default: 0
:param groups: number of groups to divide input and output channels into,
so as to perform a "grouped convolution". When groups is not 1,
in_channels and out_channels must be divisible by groups.
:param groups: number of groups into which the input and output channels are divided,
so as to perform a "grouped convolution". When ``groups`` is not 1,
``in_channels`` and ``out_channels`` must be divisible by ``groups``.
The shape of weight is `(groups, output_height, output_width, The shape of weight is `(groups, output_height, output_width,
in_channels // groups, *kernel_size, out_channels // groups)`. in_channels // groups, *kernel_size, out_channels // groups)`.
""" """


+ 4
- 1
imperative/python/megengine/module/dropout.py View File

@@ -11,7 +11,7 @@ from .module import Module




class Dropout(Module): class Dropout(Module):
r"""Randomly set input elements to zeros with the probability :math:`drop\_prob` during training.
r"""Randomly sets input elements to zeros with the probability :math:`drop\_prob` during training.
Commonly used in large networks to prevent overfitting. Commonly used in large networks to prevent overfitting.
Note that we perform dropout only during training, we also rescale(multiply) the output tensor Note that we perform dropout only during training, we also rescale(multiply) the output tensor
by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`. by :math:`\frac{1}{1 - drop\_prob}`. During inference :class:`~.Dropout` is equal to :class:`~.Identity`.
@@ -28,3 +28,6 @@ class Dropout(Module):
return dropout(inputs, self.drop_prob, training=True) return dropout(inputs, self.drop_prob, training=True)
else: else:
return inputs return inputs

def _module_info_string(self) -> str:
return "drop_prob={drop_prob}".format(drop_prob=self.drop_prob)

+ 3
- 3
imperative/python/megengine/module/elemwise.py View File

@@ -34,7 +34,7 @@ class Elemwise(Module):
* "EXP": exp(x) * "EXP": exp(x)
* "TANH": tanh(x) * "TANH": tanh(x)
* "FUSE_MUL_ADD3": x * y + z * "FUSE_MUL_ADD3": x * y + z
* "FAST_TANH": fast_tanh(x)
* "FAST_TANH": x * (27. + x * x) / (27. + 9. * x * x)
* "NEGATE": -x * "NEGATE": -x
* "ACOS": acos(x) * "ACOS": acos(x)
* "ASIN": asin(x) * "ASIN": asin(x)
@@ -56,9 +56,9 @@ class Elemwise(Module):
* "SIGMOID_GRAD": sigmoid_grad * "SIGMOID_GRAD": sigmoid_grad
* "SWITCH_GT0": switch_gt0 * "SWITCH_GT0": switch_gt0
* "TANH_GRAD": tanh_grad * "TANH_GRAD": tanh_grad
* "LT": lt
* "LT": less
* "LEQ": leq * "LEQ": leq
* "EQ": eq
* "EQ": equal
* "POW": pow * "POW": pow
* "LOG_SUM_EXP": log_sum_exp * "LOG_SUM_EXP": log_sum_exp
* "FAST_TANH_GRAD": fast_tanh_grad * "FAST_TANH_GRAD": fast_tanh_grad


+ 6
- 6
imperative/python/megengine/module/embedding.py View File

@@ -10,7 +10,7 @@ from typing import Optional


import numpy as np import numpy as np


from ..functional import embedding as embedding_func
from ..functional.nn import embedding as embedding_func
from ..tensor import Parameter from ..tensor import Parameter
from . import init from . import init
from .module import Module from .module import Module
@@ -26,9 +26,9 @@ class Embedding(Module):


:param num_embeddings: size of embedding dictionary. :param num_embeddings: size of embedding dictionary.
:param embedding_dim: size of each embedding vector. :param embedding_dim: size of each embedding vector.
:param padding_idx: should be set to None, not support now.
:param max_norm: should be set to None, not support now.
:param norm_type: should be set to None, not support now.
:param padding_idx: should be set to None, not supportted now.
:param max_norm: should be set to None, not supportted now.
:param norm_type: should be set to None, not supportted now.
:param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim). :param initial_weight: the learnable weights of the module of shape (num_embeddings, embedding_dim).


Examples: Examples:
@@ -121,8 +121,8 @@ class Embedding(Module):
r""" r"""
Creates Embedding instance from given 2-dimensional FloatTensor. Creates Embedding instance from given 2-dimensional FloatTensor.


:param embeddings: Tensor contained weight for the embedding.
:param freeze: If ``True``, the weight does not get updated during the learning process. Default: ``True``.
:param embeddings: tensor contained weight for the embedding.
:param freeze: if ``True``, the weight does not get updated during the learning process. Default: True.
:param padding_idx: should be set to None, not support Now. :param padding_idx: should be set to None, not support Now.
:param max_norm: should be set to None, not support Now. :param max_norm: should be set to None, not support Now.
:param norm_type: should be set to None, not support Now. :param norm_type: should be set to None, not support Now.


+ 2
- 2
imperative/python/megengine/module/identity.py View File

@@ -6,7 +6,7 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from ..functional import identity
from ..functional import copy
from .module import Module from .module import Module




@@ -14,4 +14,4 @@ class Identity(Module):
r"""A placeholder identity operator that will ignore any argument.""" r"""A placeholder identity operator that will ignore any argument."""


def forward(self, x): def forward(self, x):
return identity(x)
return copy(x)

+ 40
- 40
imperative/python/megengine/module/init.py View File

@@ -18,48 +18,48 @@ from ..tensor import Tensor




def fill_(tensor: Tensor, val: Union[float, int]) -> None: def fill_(tensor: Tensor, val: Union[float, int]) -> None:
"""Fill the given ``tensor`` with value ``val``.
"""Fills the given ``tensor`` with value ``val``.


:param tensor: An n-dimentional tensor to be initialized
:param val: The value to be filled throughout the tensor
:param tensor: tensor to be initialized.
:param val: value to be filled throughout the tensor.
""" """
tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype)) tensor._reset(full(shape=tensor.shape, value=val, dtype=tensor.dtype))




def zeros_(tensor: Tensor) -> None: def zeros_(tensor: Tensor) -> None:
"""Fill the given ``tensor`` with scalar value `0`.
"""Fills the given ``tensor`` with scalar value `0`.


:param tensor: An n-dimentional tensor to be initialized
:param tensor: tensor to be initialized.
""" """
fill_(tensor, 0) fill_(tensor, 0)




def ones_(tensor: Tensor) -> None: def ones_(tensor: Tensor) -> None:
"""Fill the given ``tensor`` with the scalar value `1`.
"""Fills the given ``tensor`` with the scalar value `1`.


:param tensor: An n-dimentional tensor to be initialized
:param tensor: tensor to be initialized.
""" """
fill_(tensor, 1) fill_(tensor, 1)




def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None: def uniform_(tensor: Tensor, a: float = 0.0, b: float = 1.0) -> None:
r"""Fill the given ``tensor`` with random value sampled from uniform distribution
r"""Fills the given ``tensor`` with random value sampled from uniform distribution
:math:`\mathcal{U}(\text{a}, \text{b})`. :math:`\mathcal{U}(\text{a}, \text{b})`.


:param tensor: An n-dimentional tensor to be initialized
:param a: Lower bound of the sampling interval
:param b: Upper bound of the sampling interval
:param tensor: tensor to be initialized.
:param a: lower bound of the sampling interval.
:param b: upper bound of the sampling interval.
""" """
tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype)) tensor._reset(uniform(size=tensor.shape, low=a, high=b).astype(tensor.dtype))




def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None: def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
r"""Fill the given ``tensor`` with random value sampled from normal distribution
r"""Fills the given ``tensor`` with random value sampled from normal distribution
:math:`\mathcal{N}(\text{mean}, \text{std}^2)`. :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.


:param tensor: An n-dimentional tensor to be initialized
:param mean: The mean of the normal distribution
:param std: The standard deviation of the normal distribution
:param tensor: tensor to be initialized.
:param mean: mean of the normal distribution.
:param std: standard deviation of the normal distribution.
""" """
tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype)) tensor._reset(normal(size=tensor.shape, mean=mean, std=std).astype(tensor.dtype))


@@ -67,7 +67,7 @@ def normal_(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> None:
def calculate_gain( def calculate_gain(
nonlinearity: str, param: Optional[Union[int, float]] = None nonlinearity: str, param: Optional[Union[int, float]] = None
) -> float: ) -> float:
r"""Return a recommended gain value (see the table below) for the given nonlinearity
r"""Returns a recommended gain value (see the table below) for the given nonlinearity
function. function.


================= ==================================================== ================= ====================================================
@@ -81,8 +81,8 @@ def calculate_gain(
Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}` Leaky Relu :math:`\sqrt{\frac{2}{1 + {\text{negative}_\text{slope}}^2}}`
================= ==================================================== ================= ====================================================


:param nonlinearity: Name of the non-linear function
:param param: Optional parameter for leaky_relu. Only effective when
:param nonlinearity: name of the non-linear function.
:param param: optional parameter for leaky_relu. Only effective when
``nonlinearity`` is "leaky_relu". ``nonlinearity`` is "leaky_relu".


""" """
@@ -119,10 +119,10 @@ def calculate_gain(


def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]: def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]:
""" """
Calculate fan_in / fan_out value for given weight tensor. This function assumes
input tensor is stored in NCHW format.
Calculates fan_in / fan_out value for given weight tensor. This function assumes
input tensor is stored in ``NCHW`` format.


:param tensor: Weight tensor in NCHW format
:param tensor: weight tensor in ``NCHW`` format.
""" """
shape = tensor.shape shape = tensor.shape
ndim = len(shape) ndim = len(shape)
@@ -148,13 +148,13 @@ def calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[float, float]:


def calculate_correct_fan(tensor: Tensor, mode: str) -> float: def calculate_correct_fan(tensor: Tensor, mode: str) -> float:
""" """
Calculate fan_in or fan_out value for given weight tensor, depending on given
Calculates fan_in / fan_out value for given weight tensor, depending on given
``mode``. ``mode``.


See :func:`calculate_fan_in_and_fan_out` for details. See :func:`calculate_fan_in_and_fan_out` for details.


:param tensor: Weight tensor in NCHW format
:param mode: ``'fan_in'`` or ``'fan_out'``
:param tensor: weight tensor in ``NCHW`` format.
:param mode: "fan_in" or "fan_out".
""" """
mode = mode.lower() mode = mode.lower()
valid_modes = ["fan_in", "fan_out"] valid_modes = ["fan_in", "fan_out"]
@@ -168,7 +168,7 @@ def calculate_correct_fan(tensor: Tensor, mode: str) -> float:




def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None: def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None:
r"""Fill ``tensor`` with random values sampled from :math:`\mathcal{U}(-a, a)`
r"""Fills tensor with random values sampled from :math:`\mathcal{U}(-a, a)`
where where


.. math:: .. math::
@@ -178,8 +178,8 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None:
`Understanding the difficulty of training deep feedforward neural networks` - `Understanding the difficulty of training deep feedforward neural networks` -
Glorot, X. & Bengio, Y. (2010). Glorot, X. & Bengio, Y. (2010).


:param tensor: An n-dimentional tensor to be initialized
:param gain: Scaling factor for :math:`a`.
:param tensor: tensor to be initialized.
:param gain: scaling factor for :math:`a`.
""" """
fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) fan_in, fan_out = calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
@@ -188,7 +188,7 @@ def xavier_uniform_(tensor: Tensor, gain: float = 1.0) -> None:




def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None: def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None:
r"""Fill ``tensor`` with random values sampled from
r"""Fills tensor with random values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where :math:`\mathcal{N}(0, \text{std}^2)` where


.. math:: .. math::
@@ -198,8 +198,8 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None:
`Understanding the difficulty of training deep feedforward neural networks` - `Understanding the difficulty of training deep feedforward neural networks` -
Glorot, X. & Bengio, Y. (2010). Glorot, X. & Bengio, Y. (2010).


:param tensor: An n-dimentional tensor to be initialized
:param gain: Scaling factor for :math:`std`.
:param tensor: tensor to be initialized.
:param gain: scaling factor for :math:`std`.
""" """
fan_in, fan_out = calculate_fan_in_and_fan_out(tensor) fan_in, fan_out = calculate_fan_in_and_fan_out(tensor)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
@@ -209,7 +209,7 @@ def xavier_normal_(tensor: Tensor, gain: float = 1.0) -> None:
def msra_uniform_( def msra_uniform_(
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu"
) -> None: ) -> None:
r"""Fill ``tensor`` wilth random values sampled from
r"""Fills tensor wilth random values sampled from
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where :math:`\mathcal{U}(-\text{bound}, \text{bound})` where


.. math:: .. math::
@@ -219,13 +219,13 @@ def msra_uniform_(
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet `Delving deep into rectifiers: Surpassing human-level performance on ImageNet
classification` classification`


:param tensor: An n-dimentional tensor to be initialized
:param a: Optional parameter for calculating gain for leaky_relu. See
:param tensor: tensor to be initialized.
:param a: optional parameter for calculating gain for leaky_relu. See
:func:`calculate_gain` for details. :func:`calculate_gain` for details.
:param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the
:param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the
scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for scaling factor for :math:`bound`. See :func:`calculate_fan_in_and_fan_out` for
details. details.
:param nonlinearity: Name of the non-linear function used to calculate :math:`gain`.
:param nonlinearity: name of the non-linear function used to calculate :math:`gain`.
See :func:`calculate_gain` for details. See :func:`calculate_gain` for details.
""" """
fan = calculate_correct_fan(tensor, mode) fan = calculate_correct_fan(tensor, mode)
@@ -238,7 +238,7 @@ def msra_uniform_(
def msra_normal_( def msra_normal_(
tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu" tensor: Tensor, a: float = 0, mode: str = "fan_in", nonlinearity: str = "leaky_relu"
) -> None: ) -> None:
r"""Fill ``tensor`` wilth random values sampled from
r"""Fills tensor wilth random values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where :math:`\mathcal{N}(0, \text{std}^2)` where


.. math:: .. math::
@@ -248,13 +248,13 @@ def msra_normal_(
`Delving deep into rectifiers: Surpassing human-level performance on ImageNet `Delving deep into rectifiers: Surpassing human-level performance on ImageNet
classification` classification`


:param tensor: An n-dimentional tensor to be initialized
:param a: Optional parameter for calculating gain for leaky_relu. See
:param tensor: tensor to be initialized
:param a: optional parameter for calculating gain for leaky_relu. See
:func:`calculate_gain` for details. :func:`calculate_gain` for details.
:param mode: ``'fan_in'`` or ``'fan_out'``, used to calculate :math:`gain`, the
:param mode: "fan_in" or "fan_out", used to calculate :math:`gain`, the
scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for scaling factor for :math:`gain`. See :func:`calculate_fan_in_and_fan_out` for
details. details.
:param nonlinearity: Name of the non-linear function used to calculate :math:`gain`.
:param nonlinearity: name of the non-linear function used to calculate :math:`gain`.
See :func:`calculate_gain` for details. See :func:`calculate_gain` for details.
""" """
fan = calculate_correct_fan(tensor, mode) fan = calculate_correct_fan(tensor, mode)


+ 7
- 2
imperative/python/megengine/module/linear.py View File

@@ -7,7 +7,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np import numpy as np


from ..functional import linear
from ..functional.nn import linear
from ..tensor import Parameter from ..tensor import Parameter
from . import init from . import init
from .module import Module from .module import Module
@@ -25,7 +25,7 @@ class Linear(Module):


:param in_features: size of each input sample. :param in_features: size of each input sample.
:param out_features: size of each output sample. :param out_features: size of each output sample.
:param bias: If set to ``False``, the layer will not learn an additive bias.
:param bias: if it's ``False``, the layer will not learn an additional ``bias``.
Default: ``True`` Default: ``True``


Examples: Examples:
@@ -78,3 +78,8 @@ class Linear(Module):


def forward(self, x): def forward(self, x):
return self._calc_linear(x, self.weight, self.bias) return self._calc_linear(x, self.weight, self.bias)

def _module_info_string(self) -> str:
return "in_features={}, out_features={}, bias={}".format(
self.in_features, self.out_features, self.bias is not None
)

+ 79
- 25
imperative/python/megengine/module/module.py View File

@@ -69,14 +69,14 @@ class Module(metaclass=ABCMeta):
self._forward_pre_hooks = OrderedDict() self._forward_pre_hooks = OrderedDict()
self._forward_hooks = OrderedDict() self._forward_hooks = OrderedDict()


self._modules = []

@abstractmethod @abstractmethod
def forward(self, inputs): def forward(self, inputs):
pass pass


def register_forward_pre_hook(self, hook: Callable) -> HookHandler: def register_forward_pre_hook(self, hook: Callable) -> HookHandler:
"""Register a hook to handle forward inputs. `hook` should be a function

Note that `inputs` keyword inputs
"""Registers a hook to handle forward inputs. `hook` should be a function.


:param hook: a function that receive `module` and `inputs`, then return :param hook: a function that receive `module` and `inputs`, then return
a modified `inputs` or `None`. a modified `inputs` or `None`.
@@ -85,7 +85,7 @@ class Module(metaclass=ABCMeta):
return HookHandler(self._forward_pre_hooks, hook) return HookHandler(self._forward_pre_hooks, hook)


def register_forward_hook(self, hook: Callable) -> HookHandler: def register_forward_hook(self, hook: Callable) -> HookHandler:
"""Register a hook to handle forward results. `hook` should be a function that
"""Registers a hook to handle forward results. `hook` should be a function that
receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`. receive `module`, `inputs` and `outputs`, then return a modified `outputs` or `None`.


This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook. This method return a handler with :meth:`~.HookHandler.remove` interface to delete the hook.
@@ -124,12 +124,12 @@ class Module(metaclass=ABCMeta):
returned iterable is guaranteed to be identical, as long as all the involved returned iterable is guaranteed to be identical, as long as all the involved
module objects' ``__dict__`` does not change thoughout those calls. module objects' ``__dict__`` does not change thoughout those calls.


:param recursive: Whether to recursively scan all the submodules.
:param with_key: Whether to yield keys along with yielded objects.
:param with_parent: Whether to yield ``self`` along with yielded objects.
:param prefix: The prefix appended to the yielded keys.
:param predicate: The predicate function applied to scanned objects.
:param seen: A dict that records whether a module has been traversed yet.
:param recursive: whether to recursively scan all the submodules.
:param with_key: whether to yield keys along with yielded objects.
:param with_parent: whether to yield ``self`` along with yielded objects.
:param prefix: prefix appended to the yielded keys.
:param predicate: the predication function applied to scanned objects.
:param seen: a dict that records whether a module has been traversed yet.
""" """
if seen is None: if seen is None:
seen = set([id(self)]) seen = set([id(self)])
@@ -191,10 +191,10 @@ class Module(metaclass=ABCMeta):
self, prefix: Optional[str] = None, recursive: bool = True, **kwargs self, prefix: Optional[str] = None, recursive: bool = True, **kwargs
) -> Iterable[Tuple[str, Parameter]]: ) -> Iterable[Tuple[str, Parameter]]:
"""Returns an iterable for key :class:`~.Parameter` pairs of the module, where """Returns an iterable for key :class:`~.Parameter` pairs of the module, where
``key`` is the dotted path from this module to the :class:`~.Parameter` .
``key`` is the dotted path from this module to the :class:`~.Parameter`.


:param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all :class:`~.Parameter` within this
:param prefix: prefix prepended to the keys.
:param recursive: if ``True``, returns all :class:`~.Parameter` within this
module, else only returns :class:`~.Parameter` that are direct attributes module, else only returns :class:`~.Parameter` that are direct attributes
of this module. of this module.
""" """
@@ -223,7 +223,7 @@ class Module(metaclass=ABCMeta):


Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.


:param recursive: If ``True``, returns all buffers within this
:param recursive: if ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes module, else only returns buffers that are direct attributes
of this module. of this module.
""" """
@@ -239,8 +239,8 @@ class Module(metaclass=ABCMeta):


Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`. Buffer is defined to be :class:`~.Tensor` excluding :class:`~.Parameter`.


:param prefix: The prefix prepended to the keys.
:param recursive: If ``True``, returns all buffers within this
:param prefix: prefix prepended to the keys.
:param recursive: if ``True``, returns all buffers within this
module, else only returns buffers that are direct attributes module, else only returns buffers that are direct attributes
of this module. of this module.
""" """
@@ -285,7 +285,7 @@ class Module(metaclass=ABCMeta):
module, including itself, where 'key' is the dotted path from this module to the module, including itself, where 'key' is the dotted path from this module to the
submodules. submodules.


:param prefix: The prefix prepended to the path.
:param prefix: prefix prepended to the path.
""" """
if "with_parent" in kwargs and kwargs["with_parent"]: if "with_parent" in kwargs and kwargs["with_parent"]:
yield ("" if prefix is None else prefix), self, None yield ("" if prefix is None else prefix), self, None
@@ -296,24 +296,24 @@ class Module(metaclass=ABCMeta):
) )


def apply(self, fn: "Callable[[Module], Any]") -> None: def apply(self, fn: "Callable[[Module], Any]") -> None:
"""Apply function ``fn`` to all the modules within this module, including
"""Applies function ``fn`` to all the modules within this module, including
itself. itself.


:param fn: The function to be applied on modules.
:param fn: the function to be applied on modules.
""" """
for it in self.modules(): for it in self.modules():
fn(it) fn(it)


@deprecated(version="1.0") @deprecated(version="1.0")
def zero_grad(self) -> None: def zero_grad(self) -> None:
"""Set all parameters' grads to zero
"""Sets all parameters' grads to zero
""" """
for param in self.parameters(): for param in self.parameters():
if param.grad is not None: if param.grad is not None:
param.grad.reset_zero() param.grad.reset_zero()


def train(self, mode: bool = True, recursive: bool = True) -> None: def train(self, mode: bool = True, recursive: bool = True) -> None:
"""Set training mode of all the modules within this module (including itself) to
"""Sets training mode of all the modules within this module (including itself) to
``mode``. This effectively sets the ``training`` attributes of those modules ``mode``. This effectively sets the ``training`` attributes of those modules
to ``mode``, but only has effect on certain modules (e.g. to ``mode``, but only has effect on certain modules (e.g.
:class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`) :class:`~.BatchNorm2d`, :class:`~.Dropout`, :class:`~.Observer`)
@@ -331,14 +331,14 @@ class Module(metaclass=ABCMeta):
self.apply(fn) self.apply(fn)


def eval(self) -> None: def eval(self) -> None:
"""Set training mode of all the modules within this module (including itself) to
"""Sets training mode of all the modules within this module (including itself) to
``False``. See :meth:`~.Module.train` for details. ``False``. See :meth:`~.Module.train` for details.
""" """
self.train(False) self.train(False)


def disable_quantize(self, value=True): def disable_quantize(self, value=True):
r""" r"""
Set ``module``'s ``quantize_disabled`` attribute and return ``module``.
Sets ``module``'s ``quantize_disabled`` attribute and return ``module``.
Could be used as a decorator. Could be used as a decorator.
""" """


@@ -351,7 +351,7 @@ class Module(metaclass=ABCMeta):
def replace_param( def replace_param(
self, params: dict, start_pos: int, seen: Optional[Set[int]] = None self, params: dict, start_pos: int, seen: Optional[Set[int]] = None
): ):
"""Replace module's parameters with `params`, used by :class:`~.ParamPack` to
"""Replaces module's parameters with `params`, used by :class:`~.ParamPack` to
speedup multimachine training. speedup multimachine training.
""" """
offset = 0 offset = 0
@@ -407,7 +407,7 @@ class Module(metaclass=ABCMeta):
state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]], state_dict: Union[dict, Callable[[str, Tensor], Optional[np.ndarray]]],
strict=True, strict=True,
): ):
r"""Load a given dictionary created by :func:`state_dict` into this module.
r"""Loads a given dictionary created by :func:`state_dict` into this module.
If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys If ``strict`` is ``True``, the keys of :func:`state_dict` must exactly match the keys
returned by :func:`state_dict`. returned by :func:`state_dict`.


@@ -518,3 +518,57 @@ class Module(metaclass=ABCMeta):
loaded.append(k) loaded.append(k)


return set(loaded), set(skipped) return set(loaded), set(skipped)

def __setattr__(self, name: str, value):
if _is_module(value):
modules = self.__dict__.get("_modules")
if modules is None:
raise AttributeError(
"cannot assign module before Module.__init__() call"
)
if name not in self.__dict__:
modules.append(name)
super().__setattr__(name, value)

def __delattr__(self, name: str):
if name in self.__dict__ and _is_module(self.__dict__[name]):
modules = self.__dict__.get("_modules")
modules.remove(name)
super().__delattr__(name)

def _module_info_string(self) -> str:
r"""Set the extra representation of the module.
"""
return ""

def __repr__(self):
def add_indent(repr_str, num_spaces):
s = repr_str.split("\n")
# don't do anything for single-line stuff
if len(s) == 1:
return repr_str
first = s.pop(0)
s = [(num_spaces * " ") + line for line in s]
s = "\n".join(s)
s = first + "\n" + s
return s

extra_lines = []
extra_repr = self._module_info_string()
if extra_repr:
extra_lines = extra_repr.split("\n")
child_lines = [
"(" + name + "): " + add_indent(repr(self.__dict__[name]), 2)
for name in self._modules
]
lines = extra_lines + child_lines
main_str = self.__class__.__name__ + "("
if lines:
# simple one-liner info, which most builtin Modules will use
if len(extra_lines) == 1 and not child_lines:
main_str += extra_lines[0]
else:
main_str += "\n " + "\n ".join(lines) + "\n"

main_str += ")"
return main_str

+ 5
- 0
imperative/python/megengine/module/pooling.py View File

@@ -29,6 +29,11 @@ class _PoolNd(Module):
def forward(self, inp): def forward(self, inp):
pass pass


def _module_info_string(self) -> str:
return "kernel_size={kernel_size}, stride={stride}, padding={padding}".format(
**self.__dict__
)



class MaxPool2d(_PoolNd): class MaxPool2d(_PoolNd):
r"""Applies a 2D max pooling over an input. r"""Applies a 2D max pooling over an input.


+ 5
- 13
imperative/python/megengine/module/qat/conv_bn.py View File

@@ -5,7 +5,7 @@
# Unless required by applicable law or agreed to in writing, # Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an # software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from ...functional import add_update, ones, relu, sqrt, sum, zeros
from ...functional import ones, relu, sqrt, sum, zeros
from ...quantization.utils import fake_quant_bias from ...quantization.utils import fake_quant_bias
from .. import conv_bn as Float from .. import conv_bn as Float
from .module import QATModule from .module import QATModule
@@ -76,18 +76,10 @@ class _ConvBnActivation2d(Float._ConvBnActivation2d, QATModule):
bn_var.detach() * num_elements_per_channel / (num_elements_per_channel - 1) bn_var.detach() * num_elements_per_channel / (num_elements_per_channel - 1)
) )
exponential_average_factor = 1 - self.bn.momentum exponential_average_factor = 1 - self.bn.momentum
add_update(
self.bn.running_mean,
delta=bn_mean,
alpha=1 - exponential_average_factor,
beta=exponential_average_factor,
)
add_update(
self.bn.running_var,
delta=bn_var,
alpha=1 - exponential_average_factor,
beta=exponential_average_factor,
)
self.bn.running_mean *= self.bn.momentum
self.bn.running_mean += exponential_average_factor * bn_mean
self.bn.running_var *= self.bn.momentum
self.bn.running_var += exponential_average_factor * bn_var


def calc_conv_bn_qat(self, inp, approx=True): def calc_conv_bn_qat(self, inp, approx=True):
if self.training and not approx: if self.training and not approx:


+ 1
- 1
imperative/python/megengine/module/qat/linear.py View File

@@ -18,7 +18,7 @@ class Linear(Float.Linear, QATModule):
:param in_features: size of each input sample. :param in_features: size of each input sample.
:param out_features: size of each output sample. :param out_features: size of each output sample.
:param bias: If set to ``False``, the layer will not learn an additive bias. :param bias: If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
Default: True


""" """




+ 1
- 1
imperative/python/megengine/module/qat/module.py View File

@@ -52,7 +52,7 @@ class QATModule(Module):
self.weight_fake_quant = safe_call(qconfig.weight_fake_quant) self.weight_fake_quant = safe_call(qconfig.weight_fake_quant)


def _enable_exec(self, with_module, func, enable): def _enable_exec(self, with_module, func, enable):
if not with_module:
if not with_module or not func:
return return
if enable: if enable:
func.enable() func.enable()


+ 2
- 2
imperative/python/megengine/module/quantized/concat.py View File

@@ -15,7 +15,7 @@ from .module import QuantizedModule


class Concat(QuantizedModule): class Concat(QuantizedModule):
r""" r"""
A :class:`~.QuantizedModule` to do quantized concat, inference only.
A :class:`~.QuantizedModule` to do quantized concat, used for inference only.
""" """


def __init__(self, dtype=None): def __init__(self, dtype=None):
@@ -29,7 +29,7 @@ class Concat(QuantizedModule):
@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT.Concat): def from_qat_module(cls, qat_module: QAT.Concat):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
return cls(qat_module.get_activation_dtype()) return cls(qat_module.get_activation_dtype())

+ 5
- 5
imperative/python/megengine/module/quantized/conv.py View File

@@ -11,17 +11,17 @@ import numpy as np


from ... import module as Float from ... import module as Float
from ...core.tensor import dtype from ...core.tensor import dtype
from ...functional import conv_bias_activation
from ...functional.nn import conv_bias_activation
from ...tensor import Parameter from ...tensor import Parameter
from ..qat import conv as QAT from ..qat import conv as QAT
from .module import QuantizedModule from .module import QuantizedModule




class Conv2d(Float.Conv2d, QuantizedModule): class Conv2d(Float.Conv2d, QuantizedModule):
r"""quantized version of :class:`~.qat.conv.Conv2d`."""
r"""Applies a 2D convolution over an quantized input tensor, inference only.
r"""Quantized version of :class:`~.qat.conv.Conv2d`."""
r"""Applies a 2D convolution over a quantized input tensor, used for inference only.


The parameter is same with :class: `~.Conv2d`
The parameter is same with :class: `~.Conv2d`.
""" """


def __init__( def __init__(
@@ -101,7 +101,7 @@ class Conv2d(Float.Conv2d, QuantizedModule):




class ConvRelu2d(Conv2d): class ConvRelu2d(Conv2d):
r"""quantized version of :class:`~.qat.conv.ConvRelu2d`."""
r"""Quantized version of :class:`~.qat.conv.ConvRelu2d`."""


def forward(self, inp): def forward(self, inp):
return self.calc_conv_quantized(inp, nonlinear_mode="RELU") return self.calc_conv_quantized(inp, nonlinear_mode="RELU")

+ 5
- 5
imperative/python/megengine/module/quantized/conv_bn.py View File

@@ -11,15 +11,15 @@ from .conv import Conv2d




class _ConvBnActivation2d(Conv2d): class _ConvBnActivation2d(Conv2d):
r"""Applies a 2D convolution over an quantized input tensor, inference only.
r"""Applies a 2D convolution over a quantized input tensor, used for inference only.


The parameter is same with :class: `~.Conv2d`
The parameter is same with :class: `~.Conv2d`.
""" """


@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT._ConvBnActivation2d): def from_qat_module(cls, qat_module: QAT._ConvBnActivation2d):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
output_dtype = qat_module.get_activation_dtype() output_dtype = qat_module.get_activation_dtype()
@@ -43,14 +43,14 @@ class _ConvBnActivation2d(Conv2d):




class ConvBn2d(_ConvBnActivation2d): class ConvBn2d(_ConvBnActivation2d):
r"""quantized version of :class:`~.qat.conv_bn.ConvBn2d`."""
r"""Quantized version of :class:`~.qat.conv_bn.ConvBn2d`."""


def forward(self, inp): def forward(self, inp):
return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY") return self.calc_conv_quantized(inp, nonlinear_mode="IDENTITY")




class ConvBnRelu2d(_ConvBnActivation2d): class ConvBnRelu2d(_ConvBnActivation2d):
r"""quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`."""
r"""Quantized version of :class:`~.qat.conv_bn.ConvBnRelu2d`."""


def forward(self, inp): def forward(self, inp):
return self.calc_conv_quantized(inp, nonlinear_mode="RELU") return self.calc_conv_quantized(inp, nonlinear_mode="RELU")

+ 2
- 2
imperative/python/megengine/module/quantized/elemwise.py View File

@@ -13,7 +13,7 @@ from .module import QuantizedModule




class Elemwise(QuantizedModule): class Elemwise(QuantizedModule):
r"""quantized version of :class:`~.qat.elemwise.Elemwise`."""
r"""Quantized version of :class:`~.qat.elemwise.Elemwise`."""


_elemwise_multi_type_mode = P.ElemwiseMultiType.Mode _elemwise_multi_type_mode = P.ElemwiseMultiType.Mode


@@ -30,7 +30,7 @@ class Elemwise(QuantizedModule):
@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT.Elemwise): def from_qat_module(cls, qat_module: QAT.Elemwise):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
return cls(qat_module.method.name, qat_module.get_activation_dtype()) return cls(qat_module.method.name, qat_module.get_activation_dtype())

+ 3
- 3
imperative/python/megengine/module/quantized/linear.py View File

@@ -15,7 +15,7 @@ from .module import QuantizedModule




class Linear(QuantizedModule): class Linear(QuantizedModule):
r"""quantized version of :class:`~.qat.linear.Linear`."""
r"""Quantized version of :class:`~.qat.linear.Linear`."""


def __init__( def __init__(
self, dtype: np.dtype = None, self, dtype: np.dtype = None,
@@ -31,7 +31,7 @@ class Linear(QuantizedModule):
inp_scale = dtype.get_scale(inp.dtype) inp_scale = dtype.get_scale(inp.dtype)
w_scale = dtype.get_scale(self.weight.dtype) w_scale = dtype.get_scale(self.weight.dtype)
bias_dtype = dtype.qint32(inp_scale * w_scale) bias_dtype = dtype.qint32(inp_scale * w_scale)
return F.linear(
return F.nn.linear(
inp, inp,
self.weight, self.weight,
None if self.bias is None else self.bias.astype(bias_dtype), None if self.bias is None else self.bias.astype(bias_dtype),
@@ -40,7 +40,7 @@ class Linear(QuantizedModule):
@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT.Linear): def from_qat_module(cls, qat_module: QAT.Linear):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
output_dtype = qat_module.get_activation_dtype() output_dtype = qat_module.get_activation_dtype()


+ 1
- 1
imperative/python/megengine/module/quantized/module.py View File

@@ -26,6 +26,6 @@ class QuantizedModule(Module):
@abstractmethod @abstractmethod
def from_qat_module(cls, qat_module: QATModule): def from_qat_module(cls, qat_module: QATModule):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """

+ 4
- 4
imperative/python/megengine/module/quantized/quant_dequant.py View File

@@ -11,7 +11,7 @@ from .module import QuantizedModule


class QuantStub(QuantizedModule): class QuantStub(QuantizedModule):
r""" r"""
quantized version of :class:`~.qat.quant_dequant.QuantStub`,
Quantized version of :class:`~.qat.quant_dequant.QuantStub`,
will convert input to quantized dtype. will convert input to quantized dtype.
""" """


@@ -25,7 +25,7 @@ class QuantStub(QuantizedModule):
@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT.QuantStub): def from_qat_module(cls, qat_module: QAT.QuantStub):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
return cls(qat_module.get_activation_dtype()) return cls(qat_module.get_activation_dtype())
@@ -33,7 +33,7 @@ class QuantStub(QuantizedModule):


class DequantStub(QuantizedModule): class DequantStub(QuantizedModule):
r""" r"""
quantized version of :class:`~.qat.quant_dequant.DequantStub`,
Quantized version of :class:`~.qat.quant_dequant.DequantStub`,
will restore quantized input to float32 dtype. will restore quantized input to float32 dtype.
""" """


@@ -43,7 +43,7 @@ class DequantStub(QuantizedModule):
@classmethod @classmethod
def from_qat_module(cls, qat_module: QAT.DequantStub): def from_qat_module(cls, qat_module: QAT.DequantStub):
r""" r"""
return a :class:`~.QuantizedModule` instance converted from a
Return a :class:`~.QuantizedModule` instance converted from a
:class:`~.QATModule` instance. :class:`~.QATModule` instance.
""" """
return cls() return cls()

+ 13
- 12
imperative/python/megengine/module/sequential.py View File

@@ -26,40 +26,40 @@ class Sequential(Module):
import megengine as mge import megengine as mge
import megengine.module as M import megengine.module as M
import megengine.functional as F import megengine.functional as F
from collections import OrderedDict


batch_size = 64 batch_size = 64
data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32) data = mge.tensor(np.zeros((batch_size, 1, 28, 28)), dtype=np.float32)
label = mge.tensor(np.zeros(batch_size,), dtype=np.int32) label = mge.tensor(np.zeros(batch_size,), dtype=np.int32)


data = data.reshape(batch_size, -1) data = data.reshape(batch_size, -1)
net = M.Sequential(
net0 = M.Sequential(
M.Linear(28 * 28, 320), M.Linear(28 * 28, 320),
M.Linear(320, 500),
M.Linear(500, 320),
M.Linear(320, 10) M.Linear(320, 10)
) )
pred = net(data)
pred0 = net0(data)


loss = F.cross_entropy_with_softmax(pred, label)
modules = OrderedDict()
modules["fc0"] = nn.Linear(28 * 28, 320)
modules["fc1"] = nn.Linear(320, 10)
net1 = nn.Sequential(modules)


pred1 = net1(data)
""" """


def __init__(self, *args): def __init__(self, *args):
super().__init__() super().__init__()
self.layer_keys = [] self.layer_keys = []
self.layer_values = []
if len(args) == 1 and isinstance(args[0], OrderedDict): if len(args) == 1 and isinstance(args[0], OrderedDict):
for key, module in args[0].items(): for key, module in args[0].items():
# self.add_module(key, module) # self.add_module(key, module)
setattr(self, key, module) setattr(self, key, module)
self.layer_keys.append(key) self.layer_keys.append(key)
self.layer_values.append(module)
else: else:
for idx, module in enumerate(args): for idx, module in enumerate(args):
# self.add_module(str(idx), module) # self.add_module(str(idx), module)
setattr(self, str(idx), module) setattr(self, str(idx), module)
self.layer_keys.append(str(idx)) self.layer_keys.append(str(idx))
self.layer_values.append(module)


def __getitem__(self, idx): def __getitem__(self, idx):
if isinstance(idx, slice): if isinstance(idx, slice):
@@ -67,11 +67,10 @@ class Sequential(Module):
OrderedDict(zip(self.layer_keys[idx], self.layer_values[idx])) OrderedDict(zip(self.layer_keys[idx], self.layer_values[idx]))
) )
else: else:
return self.layer_values[idx]
return getattr(self, self.layer_keys[idx])


def __setitem__(self, idx, module): def __setitem__(self, idx, module):
key = self.layer_keys[idx] key = self.layer_keys[idx]
self.layer_values[idx] = module
return setattr(self, key, module) return setattr(self, key, module)


def __delitem__(self, idx): def __delitem__(self, idx):
@@ -79,11 +78,9 @@ class Sequential(Module):
for key in self.layer_keys[idx]: for key in self.layer_keys[idx]:
delattr(self, key) delattr(self, key)
del self.layer_keys[idx] del self.layer_keys[idx]
del self.layer_values[idx]
else: else:
delattr(self, self.layer_keys[idx]) delattr(self, self.layer_keys[idx])
del self.layer_keys[idx] del self.layer_keys[idx]
del self.layer_values[idx]


def __len__(self): def __len__(self):
return len(self.layer_keys) return len(self.layer_keys)
@@ -91,6 +88,10 @@ class Sequential(Module):
def __iter__(self): def __iter__(self):
return iter(self.layer_values) return iter(self.layer_values)


@property
def layer_values(self):
return [getattr(self, key) for key in self.layer_keys]

def forward(self, inp): def forward(self, inp):
for layer in self.layer_values: for layer in self.layer_values:
inp = layer(inp) inp = layer(inp)


+ 5
- 5
imperative/python/megengine/optimizer/adadelta.py View File

@@ -22,13 +22,13 @@ class Adadelta(Optimizer):


:param params: iterable of parameters to optimize or dicts defining :param params: iterable of parameters to optimize or dicts defining
parameter groups. parameter groups.
:param lr: coefficient that scale delta before it is applied
to the parameters (default: 1.0).
:param lr: coefficient that scales delta before it is applied
to the parameters. Default: 1.0
:param rho: coefficient used for computing a running average :param rho: coefficient used for computing a running average
of squared gradients (default: 0.9).
of squared gradients. Default: 0.9
:param eps: term added to the denominator to improve :param eps: term added to the denominator to improve
numerical stability (default: 1e-6).
:param weight_decay: weight decay (L2 penalty) (default: 0).
numerical stability. Default: 1e-6
:param weight_decay: weight decay (L2 penalty). Default: 0
""" """


def __init__( def __init__(


+ 5
- 5
imperative/python/megengine/optimizer/adagrad.py View File

@@ -23,12 +23,12 @@ class Adagrad(Optimizer):


:param params: iterable of parameters to optimize or dicts defining :param params: iterable of parameters to optimize or dicts defining
parameter groups. parameter groups.
:param lr: coefficient that scale delta before it is applied
to the parameters (default: 1e-2).
:param lr_decay: learning rate decay (default: 0)
:param lr: coefficient that scales delta before it is applied
to the parameters. Default: 1e-2
:param lr_decay: learning rate decay. Default: 0
:param eps: term added to the denominator to improve :param eps: term added to the denominator to improve
numerical stability (default: 1e-10).
:param weight_decay: weight decay (L2 penalty) (default: 0).
numerical stability. Default: 1e-10
:param weight_decay: weight decay (L2 penalty). Default: 0
""" """


def __init__( def __init__(


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save