| @@ -6,17 +6,17 @@ | |||||
| "net_name": "ResNet50", | "net_name": "ResNet50", | ||||
| "mode": 0, | "mode": 0, | ||||
| "iteration": 0, | "iteration": 0, | ||||
| "kernels": ["TensorAdd"] | |||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||||
| }, | }, | ||||
| "DumpSettingsSpec": { | "DumpSettingsSpec": { | ||||
| "enable": "true: dump enable false: dump disable", | |||||
| "trans_flag": "true: trans to host format,false: not trans format", | |||||
| "enable": "true: dump enable, false: dump disable", | |||||
| "trans_flag": "true: trans to host format, false: not trans format", | |||||
| "path": "the dump file folder", | "path": "the dump file folder", | ||||
| "net_name": "net name eg:ResNet50", | "net_name": "net name eg:ResNet50", | ||||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration others: specified iteration ", | |||||
| "kernels": "kernel name list need to be dump" | |||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration, others: specified iteration ", | |||||
| "kernels": "op's full scope name which need to be dump" | |||||
| }, | }, | ||||
| "other": {} | "other": {} | ||||
| } | } | ||||
| @@ -6,17 +6,17 @@ | |||||
| "net_name": "ResNet50", | "net_name": "ResNet50", | ||||
| "mode": 0, | "mode": 0, | ||||
| "iteration": 0, | "iteration": 0, | ||||
| "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] | |||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||||
| }, | }, | ||||
| "DumpSettingsSpec": { | "DumpSettingsSpec": { | ||||
| "enable": "true: dump enable false: dump disable", | |||||
| "trans_flag": "true: trans to host format,false: not trans format", | |||||
| "enable": "true: dump enable, false: dump disable", | |||||
| "trans_flag": "true: trans to host format, false: not trans format", | |||||
| "path": "the dump file folder", | "path": "the dump file folder", | ||||
| "net_name": "net name eg:ResNet50", | "net_name": "net name eg:ResNet50", | ||||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration others: specified iteration ", | |||||
| "kernels": "kernel name list need to be dump" | |||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration, others: specified iteration ", | |||||
| "kernels": "op's full scope name which need to be dump" | |||||
| }, | }, | ||||
| "other": {} | "other": {} | ||||
| } | |||||
| } | |||||
| @@ -6,17 +6,17 @@ | |||||
| "net_name": "ResNet50", | "net_name": "ResNet50", | ||||
| "mode": 0, | "mode": 0, | ||||
| "iteration": 0, | "iteration": 0, | ||||
| "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] | |||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||||
| }, | }, | ||||
| "DumpSettingsSpec": { | "DumpSettingsSpec": { | ||||
| "enable": "true: dump enable false: dump disable", | |||||
| "trans_flag": "true: trans to host format,false: not trans format", | |||||
| "enable": "true: dump enable, false: dump disable", | |||||
| "trans_flag": "true: trans to host format, false: not trans format", | |||||
| "path": "the dump file folder", | "path": "the dump file folder", | ||||
| "net_name": "net name eg:ResNet50", | "net_name": "net name eg:ResNet50", | ||||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration others: specified iteration ", | |||||
| "kernels": "kernel name list need to be dump" | |||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "iteration": "0: all iteration, others: specified iteration ", | |||||
| "kernels": "op's full scope name which need to be dump" | |||||
| }, | }, | ||||
| "other": {} | "other": {} | ||||
| } | |||||
| } | |||||
| @@ -53,6 +53,7 @@ enum DataTypeTransMode { | |||||
| FROM_INT8_TO_FLOAT, | FROM_INT8_TO_FLOAT, | ||||
| FROM_INT8_TO_INT32, | FROM_INT8_TO_INT32, | ||||
| FROM_INT64_TO_INT32, | FROM_INT64_TO_INT32, | ||||
| FROM_UINT16_TO_INT32, | |||||
| }; | }; | ||||
| const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{ | const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{ | ||||
| @@ -68,7 +69,8 @@ const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{ | |||||
| {std::pair<TypeId, TypeId>(kNumberTypeUInt8, kNumberTypeInt32), FROM_UINT8_TO_INT32}, | {std::pair<TypeId, TypeId>(kNumberTypeUInt8, kNumberTypeInt32), FROM_UINT8_TO_INT32}, | ||||
| {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeFloat32), FROM_INT8_TO_FLOAT}, | {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeFloat32), FROM_INT8_TO_FLOAT}, | ||||
| {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeInt32), FROM_INT8_TO_INT32}, | {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeInt32), FROM_INT8_TO_INT32}, | ||||
| {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}}; | |||||
| {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}, | |||||
| {std::pair<TypeId, TypeId>(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}}; | |||||
| template <typename SrcT, typename DstT> | template <typename SrcT, typename DstT> | ||||
| void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { | void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { | ||||
| @@ -116,6 +118,9 @@ bool CastKernel(const TypeIdArgs &args, void *dst, const size_t data_size, const | |||||
| case FROM_INT64_TO_INT32: | case FROM_INT64_TO_INT32: | ||||
| TransDataSrc2Dst<int64_t, int32_t>(args, dst, data_size); | TransDataSrc2Dst<int64_t, int32_t>(args, dst, data_size); | ||||
| break; | break; | ||||
| case FROM_UINT16_TO_INT32: | |||||
| TransDataSrc2Dst<uint16_t, int32_t>(args, dst, data_size); | |||||
| break; | |||||
| default: | default: | ||||
| MS_LOG(ERROR) << "unsupported datatype trans"; | MS_LOG(ERROR) << "unsupported datatype trans"; | ||||
| return false; | return false; | ||||
| @@ -106,13 +106,13 @@ bool AscendDeviceAddress::SyncDeviceToHost(const std::vector<int> &shape, size_t | |||||
| } else { | } else { | ||||
| auto shape_size = trans::ShapeSize(host_shape); | auto shape_size = trans::ShapeSize(host_shape); | ||||
| auto host = std::vector<uint8_t>(size_); | auto host = std::vector<uint8_t>(size_); | ||||
| const trans::TypeIdArgs type_args{ptr_, shape_size, type_id_, type}; | |||||
| sync_ok = trans::TransDataType(type_args, host.data()); | |||||
| SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||||
| const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type}; | |||||
| sync_ok = trans::TransDataType(type_args, host_ptr); | |||||
| if (!sync_ok) { | if (!sync_ok) { | ||||
| MS_LOG(ERROR) << "trans data type failed."; | MS_LOG(ERROR) << "trans data type failed."; | ||||
| return false; | return false; | ||||
| } | } | ||||
| SyncMemory(host_ptr, host.data(), size, RT_MEMCPY_DEVICE_TO_HOST); | |||||
| } | } | ||||
| } else if (format_ == kOpFormat_NC1HWC0 || format_ == kOpFormat_FRAC_Z || format_ == kOpFormat_FRAC_NZ) { | } else if (format_ == kOpFormat_NC1HWC0 || format_ == kOpFormat_FRAC_Z || format_ == kOpFormat_FRAC_NZ) { | ||||
| sync_ok = SyncDeviceToHostAndConvertFormat(shape, size, type, host_ptr); | sync_ok = SyncDeviceToHostAndConvertFormat(shape, size, type, host_ptr); | ||||
| @@ -150,9 +150,9 @@ void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(node); | auto output_size = AnfAlgo::GetOutputTensorNum(node); | ||||
| for (size_t j = 0; j < output_size; ++j) { | for (size_t j = 0; j < output_size; ++j) { | ||||
| auto addr = AnfAlgo::GetOutputAddr(node, j); | auto addr = AnfAlgo::GetOutputAddr(node, j); | ||||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||||
| auto type = AnfAlgo::GetOutputDeviceDataType(node, j); | |||||
| auto format = AnfAlgo::GetOutputFormat(node, j); | |||||
| auto shape = AnfAlgo::GetOutputInferShape(node, j); | |||||
| auto type = AnfAlgo::GetOutputInferDataType(node, j); | |||||
| auto format = kOpFormat_DEFAULT; | |||||
| string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | ||||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | ||||
| std::vector<int> int_shapes; | std::vector<int> int_shapes; | ||||
| @@ -181,9 +181,9 @@ void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_p | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | ||||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); | |||||
| auto type = AnfAlgo::GetOutputDeviceDataType(item, PRAMATER_OUTPUT_INDEX); | |||||
| auto format = AnfAlgo::GetOutputFormat(item, PRAMATER_OUTPUT_INDEX); | |||||
| auto shape = AnfAlgo::GetOutputInferShape(item, PRAMATER_OUTPUT_INDEX); | |||||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||||
| auto format = kOpFormat_DEFAULT; | |||||
| string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; | string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; | ||||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | ||||
| std::vector<int> int_shapes; | std::vector<int> int_shapes; | ||||