From 87355c6b1eed39a65ff376a54c97c9ba6ba3a172 Mon Sep 17 00:00:00 2001 From: buxue Date: Mon, 12 Apr 2021 15:32:27 +0800 Subject: [PATCH] fix dynamic_mem free double in ReduceAll and ReduceAny in CPU --- .../kernel_compiler/cpu/reduce_cpu_kernel.cc | 43 ++-- .../kernel_compiler/cpu/reduce_cpu_kernel.h | 10 +- .../cpu/reduce_logic_cpu_kernel.cc | 185 ------------------ .../cpu/reduce_logic_cpu_kernel.h | 53 ----- .../device/ascend/ascend_device_address.cc | 3 +- .../runtime/device/cpu/cpu_device_address.cc | 2 +- .../runtime/device/cpu/cpu_kernel_runtime.cc | 6 +- 7 files changed, 43 insertions(+), 259 deletions(-) delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.cc delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.h diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc index 2665c0d2ee..e54e64a731 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc @@ -43,20 +43,33 @@ void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { auto last = std::unique(axis_.begin(), axis_.end()); axis_.erase(last, axis_.end()); auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); - if (kernel_name == "ReduceMax") { - reduce_type_ = 1; - reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::max(input[pos], *out); }; - } else if (kernel_name == "ReduceMin") { - reduce_type_ = 2; - reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::min(input[pos], *out); }; - } else if (kernel_name == "ReduceSum") { - reduce_type_ = 3; - reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; - } else if (kernel_name == "ReduceMean") { - reduce_type_ = 4; - reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; + + if constexpr (std::is_same::value) { + if (kernel_name == "ReduceAll") { + reduce_type_ = ReduceType::ReduceAll; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out &= input[pos]; }; + } else if (kernel_name == "ReduceAny") { + reduce_type_ = ReduceType::ReduceAny; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out |= input[pos]; }; + } else { + MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name_ << " for bool."; + } } else { - MS_LOG(EXCEPTION) << "unsupported reduce type: " << reduce_type_; + if (kernel_name == "ReduceMax") { + reduce_type_ = ReduceType::ReduceMax; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::max(input[pos], *out); }; + } else if (kernel_name == "ReduceMin") { + reduce_type_ = ReduceType::ReduceMin; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::min(input[pos], *out); }; + } else if (kernel_name == "ReduceSum") { + reduce_type_ = ReduceType::ReduceSum; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; + } else if (kernel_name == "ReduceMean") { + reduce_type_ = ReduceType::ReduceMean; + reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; + } else { + MS_LOG(EXCEPTION) << "Unsupported reduce operation: " << kernel_name; + } } } @@ -73,7 +86,7 @@ bool ReduceCPUKernel::Launch(const std::vector &inputs, for (size_t i = 1; i < input_size; ++i) { reduce_func_(input_addr, i, output_addr); } - if (reduce_type_ == 4) { // 4 is reduce mean + if (reduce_type_ == ReduceType::ReduceMean) { *output_addr /= input_size; } } else { @@ -113,7 +126,7 @@ bool ReduceCPUKernel::Launch(const std::vector &inputs, reduce_func_(input_addr, iter.GetPos(), &output_addr[i]); iter.GenNextPos(); } - if (reduce_type_ == 4) { // 4 is reduce mean + if (reduce_type_ == ReduceType::ReduceMean) { output_addr[i] /= stride; } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h index f6d1e5353d..60bda39fde 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h @@ -24,6 +24,8 @@ namespace mindspore { namespace kernel { +enum class ReduceType { ReduceAll, ReduceAny, ReduceMax, ReduceMin, ReduceSum, ReduceMean }; + template class ReduceCPUKernel : public CPUKernel { public: @@ -36,7 +38,7 @@ class ReduceCPUKernel : public CPUKernel { private: std::vector input_shape_; std::vector axis_; - int reduce_type_{0}; + ReduceType reduce_type_; std::function reduce_func_; }; @@ -75,6 +77,12 @@ MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOu ReduceCPUKernel, int32_t); MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), ReduceCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(ReduceAll, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), + ReduceCPUKernel, bool); + +MS_REG_CPU_KERNEL_T(ReduceAny, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), + ReduceCPUKernel, bool); } // namespace kernel } // namespace mindspore #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.cc deleted file mode 100644 index 3a93b97aec..0000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.cc +++ /dev/null @@ -1,185 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include "backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.h" -#include "runtime/device/cpu/cpu_device_address.h" - -namespace mindspore { -namespace kernel { -const size_t kReduceTypeAll = 1; -const size_t kReduceTypeAny = 2; -const size_t kMaxDim = 100; -static std::map reduce_types_map_ = {{"ReduceAll", 1}, {"ReduceAny", 2}}; - -template -void ReduceLogicCPUKernel::InitKernel(const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); - - reduce_type_ = reduce_types_map_[kernel_name]; - if (reduce_type_ == 0) { - MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; - } - shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - CheckAxis(kernel_node); - if (shape_.empty()) { - shape_.push_back(1); - } - for (size_t i = 0; i < shape_.size(); ++i) { - if (shape_[i] <= 0) { - MS_LOG(EXCEPTION) << "shape value is invalid."; - } - left_dims_ *= shape_[i]; - } - for (size_t i = 0; i < axis_.size(); ++i) { - stride_ *= shape_[axis_[i]]; - } - if (stride_ <= 0) { - MS_LOG(EXCEPTION) << "stride_ must greater than zero."; - } - left_dims_ = left_dims_ / stride_; -} - -template -bool ReduceLogicCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspaces*/, - const std::vector &outputs) { - size_t out_size = left_dims_ * sizeof(T); - size_t in_size = stride_ * out_size; - if (inputs[0]->size != in_size || outputs[0]->size != out_size) { - MS_LOG(EXCEPTION) << "invalid input or output data size!"; - } - auto input = reinterpret_cast(inputs[0]->addr); - auto output = reinterpret_cast(outputs[0]->addr); - int size = inputs[0]->size / sizeof(T); - std::deque new_inputs(IntToSize(size), false); - std::vector transpose_axis; - for (size_t i = 0; i < shape_.size(); ++i) { - bool insert = true; - for (size_t j = 0; j < axis_.size(); ++j) { - if (axis_[j] == i) { - insert = false; - break; - } - } - if (insert) { - transpose_axis.push_back(i); - } - } - (void)transpose_axis.insert(transpose_axis.end(), axis_.begin(), axis_.end()); - Transpose(size, input, shape_, transpose_axis, SizeToInt(shape_.size()), &new_inputs[0]); - ConvertDataToOutput(&new_inputs[0], output); - return true; -} - -template -void ReduceLogicCPUKernel::CheckAxis(const CNodePtr &kernel_node) { - auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS); - if (axis_addr->isa() || axis_addr->isa()) { - std::vector attr_axis; - std::vector attr_axis_me = AnfAlgo::GetNodeAttr>(kernel_node, AXIS); - (void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis), - [](const int64_t &value) { return static_cast(value); }); - if (attr_axis.size() > shape_.size()) { - MS_LOG(EXCEPTION) << "invalid axis size: " << axis_.size(); - } else if (attr_axis.empty()) { - for (size_t i = 0; i < shape_.size(); ++i) { - axis_.push_back(i); - } - } else { - for (auto axis : attr_axis) { - while (axis < 0) { - axis += SizeToInt(shape_.size()); - } - if (IntToSize(axis) >= (shape_.size())) { - MS_LOG(EXCEPTION) << "axis value is oversize."; - } - axis_.push_back(IntToSize(axis)); - } - } - } else if (axis_addr->isa()) { - int axis = static_cast(AnfAlgo::GetNodeAttr(kernel_node, AXIS)); - while (axis < 0) { - axis += SizeToInt(shape_.size()); - } - if (IntToSize(axis) >= shape_.size()) { - MS_LOG(EXCEPTION) << "axis value is oversize."; - } - axis_.push_back(IntToSize(axis)); - } else { - MS_LOG(EXCEPTION) << "Attribute axis type is invalid."; - } -} - -template -void ReduceLogicCPUKernel::ConvertDataToOutput(const T *new_input, T *output) { - if (reduce_type_ == kReduceTypeAll) { - for (size_t i = 0; i < left_dims_; ++i) { - auto value{true}; - for (size_t k = 0; k < stride_; ++k) { - value &= new_input[i * stride_ + k]; - } - output[i] = value; - } - } else if (reduce_type_ == kReduceTypeAny) { - for (size_t i = 0; i < left_dims_; ++i) { - auto value{false}; - for (size_t k = 0; k < stride_; ++k) { - value |= new_input[i * stride_ + k]; - } - output[i] = value; - } - } else { - MS_LOG(EXCEPTION) << "Array reduce kernel type " << reduce_type_ << " is not supported."; - } -} - -template -void ReduceLogicCPUKernel::Transpose(const int size, const T *input, const std::vector &input_shape, - const std::vector &input_axis, const int shape_size, T *output) { - int size_offset[kMaxDim]; - size_offset[0] = size / SizeToInt(input_shape[0]); - for (int i = 1; i < shape_size; ++i) { - size_offset[i] = size_offset[i - 1] / SizeToInt(input_shape[i]); - } - auto task = [&](size_t start, size_t end) { - int pos_array[kMaxDim]; - for (size_t position = start; position < end; position += 1) { - size_t temp_position = position; - pos_array[0] = temp_position / size_offset[0]; - for (int i = 1; i < shape_size; ++i) { - temp_position -= pos_array[i - 1] * size_offset[i - 1]; - pos_array[i] = temp_position / size_offset[i]; - } - size_t new_position = pos_array[SizeToInt(input_axis[shape_size - 1])]; - size_t new_position_size = 1; - for (int j = shape_size - 2; j >= 0; j--) { - new_position_size *= SizeToInt(input_shape[SizeToInt(input_axis[j + 1])]); - new_position += pos_array[SizeToInt(input_axis[j])] * new_position_size; - } - output[new_position] = input[position]; - } - }; - CPUKernelUtils::ParallelFor(task, size); - return; -} -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.h deleted file mode 100644 index b94e52d5fc..0000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_logic_cpu_kernel.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_LOGIC_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_LOGIC_CPU_KERNEL_H_ -#include -#include -#include -#include "backend/kernel_compiler/cpu/cpu_kernel.h" -#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" - -namespace mindspore { -namespace kernel { -template -class ReduceLogicCPUKernel : public CPUKernel { - public: - ReduceLogicCPUKernel() = default; - ~ReduceLogicCPUKernel() override = default; - void InitKernel(const CNodePtr &kernel_node) override; - bool Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs) override; - - private: - void Transpose(const int size, const T *input, const std::vector &input_shape, - const std::vector &input_axis, const int shape_size, T *output); - void ConvertDataToOutput(const T *input, T *output); - void CheckAxis(const CNodePtr &kernel_node); - size_t reduce_type_ = 0; - std::vector axis_; - std::vector shape_; - size_t left_dims_ = 1; - size_t stride_ = 1; -}; - -MS_REG_CPU_KERNEL_T(ReduceAll, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), - ReduceLogicCPUKernel, bool); -MS_REG_CPU_KERNEL_T(ReduceAny, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), - ReduceLogicCPUKernel, bool); -} // namespace kernel -} // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_LOGIC_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index 80fc92e975..c4012b5fe8 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -666,7 +666,8 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std:: } std::string file_extension = ".bin"; if (trans_flag) { - std::string path = filepath + '_' + shape + '_' + TypeIdLabel(host_type) + '_' + host_fmt + file_extension; + std::string path = + filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + host_fmt + file_extension; MS_LOG(INFO) << "E2E Dump path is " << path; mindspore::tensor::TensorPtr out_tensor = std::make_shared(host_type, host_shape); size_t host_size = out_tensor->data().nbytes(); diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc index 3efb8d2973..1508d6de11 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_device_address.cc @@ -38,7 +38,7 @@ bool CPUDeviceAddress::DumpMemToFile(const std::string &filepath, const std::str } } std::string file_extension = ".bin"; - std::string path = filepath + '_' + shape + '_' + TypeIdLabel(type_id_) + '_' + format_ + file_extension; + std::string path = filepath + '_' + shape + '_' + TypeIdToType(type_id_)->ToString() + '_' + format_ + file_extension; MS_LOG(DEBUG) << "E2E Dump path is " << path; auto host_tmp = std::vector(size_); auto ret_code = memcpy_s(host_tmp.data(), size_, ptr_, size_); diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index 9e32675e1f..76da8e20b2 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -378,9 +378,6 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink auto &dump_json_parser = DumpJsonParser::GetInstance(); dump_json_parser.UpdateDumpIter(); bool iter_dump_flag = dump_json_parser.GetIterDumpFlag(); - if (iter_dump_flag) { - CPUE2eDump::DumpParametersAndConst(kernel_graph); - } for (const auto &kernel : kernels) { #ifdef ENABLE_PROFILE @@ -436,6 +433,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink MS_LOG(INFO) << "cpu kernel: " << kernel->fullname_with_scope() << " costs " << cost_time * 1e6 << " us"; #endif } + if (iter_dump_flag) { + CPUE2eDump::DumpParametersAndConst(kernel_graph); + } return true; } } // namespace cpu