| @@ -76,7 +76,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||||
| const float alpha = 1; | const float alpha = 1; | ||||
| const float beta = 0; | const float beta = 0; | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) { | |||||
| if (use_pad_) { | |||||
| T *padded_addr = GetDeviceAddress<T>(workspace, 1); | T *padded_addr = GetDeviceAddress<T>(workspace, 1); | ||||
| if (data_format_ == kOpFormat_NHWC) { | if (data_format_ == kOpFormat_NHWC) { | ||||
| CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_, | CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_, | ||||
| @@ -133,23 +133,18 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||||
| [](const int64_t &value) { return static_cast<int>(value); }); | [](const int64_t &value) { return static_cast<int>(value); }); | ||||
| pad_height_ = pad_list[0]; | pad_height_ = pad_list[0]; | ||||
| pad_width_ = pad_list[2]; | pad_width_ = pad_list[2]; | ||||
| auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); | |||||
| use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3])); | |||||
| pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | ||||
| SetStrideAndDilation(kernel_node); | SetStrideAndDilation(kernel_node); | ||||
| cudnnTensorDescriptor_t input_descriptor_real = nullptr; | cudnnTensorDescriptor_t input_descriptor_real = nullptr; | ||||
| int padA[2]; | int padA[2]; | ||||
| int strideA[2] = {stride_[2], stride_[3]}; | int strideA[2] = {stride_[2], stride_[3]}; | ||||
| int dilaA[2] = {dilation_[2], dilation_[3]}; | int dilaA[2] = {dilation_[2], dilation_[3]}; | ||||
| if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { | |||||
| if (use_pad_) { | |||||
| pad_height_ = pad_list[0] + pad_list[1]; | pad_height_ = pad_list[0] + pad_list[1]; | ||||
| pad_width_ = pad_list[2] + pad_list[3]; | pad_width_ = pad_list[2] + pad_list[3]; | ||||
| pad_top_ = pad_list[0]; | pad_top_ = pad_list[0]; | ||||
| pad_left_ = pad_list[2]; | pad_left_ = pad_list[2]; | ||||
| // if use_pad_ == true, using zero padding in advance, else using the default cudnn pad. | |||||
| if (pad_height_ % 2 == 0 && pad_width_ % 2 == 0) { | |||||
| use_pad_ = false; | |||||
| } | |||||
| int dimA[4]; | int dimA[4]; | ||||
| int strideApadded[4]; | int strideApadded[4]; | ||||
| if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) { | if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) { | ||||
| @@ -165,18 +160,12 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||||
| } | } | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded), | CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded), | ||||
| "cudnnSetTensor4dDescriptor failed"); | "cudnnSetTensor4dDescriptor failed"); | ||||
| if (use_pad_) { | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| } else { | |||||
| padA[0] = pad_top_; | |||||
| padA[1] = pad_left_; | |||||
| } | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | ||||
| "cudnnSetConvolutionNdDescriptor failed"); | "cudnnSetConvolutionNdDescriptor failed"); | ||||
| input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_; | |||||
| input_descriptor_real = padded_desc_; | |||||
| } else { | } else { | ||||
| if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | ||||
| pad_height_ = 0; | pad_height_ = 0; | ||||
| @@ -232,7 +221,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { | |||||
| input_size_list_.push_back(input_size_); | input_size_list_.push_back(input_size_); | ||||
| input_size_list_.push_back(filter_size_); | input_size_list_.push_back(filter_size_); | ||||
| output_size_list_.push_back(output_size_); | output_size_list_.push_back(output_size_); | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) { | |||||
| if (use_pad_ && !is_null_input_) { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_, | cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_, | ||||
| conv_algorithm_, &workspace_size_), | conv_algorithm_, &workspace_size_), | ||||
| @@ -78,7 +78,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||||
| const float alpha = 1; | const float alpha = 1; | ||||
| const float beta = 0; | const float beta = 0; | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) { | |||||
| if (use_pad_) { | |||||
| T *padded = GetDeviceAddress<T>(workspace, 1); | T *padded = GetDeviceAddress<T>(workspace, 1); | ||||
| if (data_format_ == kOpFormat_NHWC) { | if (data_format_ == kOpFormat_NHWC) { | ||||
| CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_, | CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_, | ||||
| @@ -136,14 +136,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||||
| [](const int64_t &value) { return static_cast<int>(value); }); | [](const int64_t &value) { return static_cast<int>(value); }); | ||||
| pad_height_ = pad_list[0]; | pad_height_ = pad_list[0]; | ||||
| pad_width_ = pad_list[2]; | pad_width_ = pad_list[2]; | ||||
| auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); | |||||
| use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3])); | |||||
| pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | ||||
| SetStrideAndDilation(kernel_node); | SetStrideAndDilation(kernel_node); | ||||
| cudnnTensorDescriptor_t x_desc_real = nullptr; | cudnnTensorDescriptor_t x_desc_real = nullptr; | ||||
| int padA[2]; | int padA[2]; | ||||
| int strideA[2] = {stride_[0], stride_[1]}; | int strideA[2] = {stride_[0], stride_[1]}; | ||||
| int dilaA[2] = {dilation_[2], dilation_[3]}; | int dilaA[2] = {dilation_[2], dilation_[3]}; | ||||
| if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { | |||||
| if (use_pad_) { | |||||
| pad_height_ = pad_list[0] + pad_list[1]; | pad_height_ = pad_list[0] + pad_list[1]; | ||||
| pad_width_ = pad_list[2] + pad_list[3]; | pad_width_ = pad_list[2] + pad_list[3]; | ||||
| pad_top_ = pad_list[0]; | pad_top_ = pad_list[0]; | ||||
| @@ -167,17 +167,12 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | ||||
| "cudnnSetTensor4dDescriptor failed"); | "cudnnSetTensor4dDescriptor failed"); | ||||
| if (use_pad_) { | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| } else { | |||||
| padA[0] = pad_top_; | |||||
| padA[1] = pad_left_; | |||||
| } | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | ||||
| "cudnnSetConvolutionNdDescriptor failed"); | "cudnnSetConvolutionNdDescriptor failed"); | ||||
| x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_; | |||||
| x_desc_real = padded_descriptor_; | |||||
| } else { | } else { | ||||
| if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | ||||
| pad_height_ = 0; | pad_height_ = 0; | ||||
| @@ -231,7 +226,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { | |||||
| input_size_list_.push_back(input_size_); | input_size_list_.push_back(input_size_); | ||||
| output_size_list_.push_back(output_size_); | output_size_list_.push_back(output_size_); | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) { | |||||
| if (use_pad_ && !is_null_input_) { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnGetTensorSizeInBytes(padded_descriptor_, reinterpret_cast<size_t *>(&padded_size_)), | cudnnGetTensorSizeInBytes(padded_descriptor_, reinterpret_cast<size_t *>(&padded_size_)), | ||||
| "cudnnGetTensorSizeInBytes failed"); | "cudnnGetTensorSizeInBytes failed"); | ||||
| @@ -77,7 +77,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||||
| } | } | ||||
| const float alpha = 1; | const float alpha = 1; | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) { | |||||
| if (use_pad_) { | |||||
| T *padded = GetDeviceAddress<T>(workspace, 1); | T *padded = GetDeviceAddress<T>(workspace, 1); | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| @@ -139,14 +139,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||||
| [](const int64_t &value) { return static_cast<int>(value); }); | [](const int64_t &value) { return static_cast<int>(value); }); | ||||
| pad_height_ = pad_list[0]; | pad_height_ = pad_list[0]; | ||||
| pad_width_ = pad_list[2]; | pad_width_ = pad_list[2]; | ||||
| auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); | |||||
| use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3])); | |||||
| pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode"); | ||||
| SetStrideAndDilation(kernel_node); | SetStrideAndDilation(kernel_node); | ||||
| cudnnTensorDescriptor_t dx_desc_real = nullptr; | cudnnTensorDescriptor_t dx_desc_real = nullptr; | ||||
| int padA[2]; | int padA[2]; | ||||
| int strideA[2] = {stride_[0], stride_[1]}; | int strideA[2] = {stride_[0], stride_[1]}; | ||||
| int dilaA[2] = {dilation_[2], dilation_[3]}; | int dilaA[2] = {dilation_[2], dilation_[3]}; | ||||
| if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { | |||||
| if (use_pad_) { | |||||
| pad_height_ = pad_list[0] + pad_list[1]; | pad_height_ = pad_list[0] + pad_list[1]; | ||||
| pad_width_ = pad_list[2] + pad_list[3]; | pad_width_ = pad_list[2] + pad_list[3]; | ||||
| pad_top_ = pad_list[0]; | pad_top_ = pad_list[0]; | ||||
| @@ -170,17 +170,12 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded), | ||||
| "cudnnSetTensor4dDescriptor failed"); | "cudnnSetTensor4dDescriptor failed"); | ||||
| if (use_pad_) { | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| } else { | |||||
| padA[0] = pad_top_; | |||||
| padA[1] = pad_left_; | |||||
| } | |||||
| padA[0] = 0; | |||||
| padA[1] = 0; | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT( | CHECK_CUDNN_RET_WITH_EXCEPT( | ||||
| cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), | ||||
| "cudnnSetConvolutionNdDescriptor failed"); | "cudnnSetConvolutionNdDescriptor failed"); | ||||
| dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_; | |||||
| dx_desc_real = padded_descriptor_; | |||||
| } else { | } else { | ||||
| if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) { | ||||
| pad_height_ = 0; | pad_height_ = 0; | ||||
| @@ -233,7 +228,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { | |||||
| input_size_list_.push_back(w_size_); | input_size_list_.push_back(w_size_); | ||||
| output_size_list_.push_back(output_size_); | output_size_list_.push_back(output_size_); | ||||
| if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) { | |||||
| if (use_pad_ && !is_null_input_) { | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_), | CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_), | ||||
| "cudnnGetTensorSizeInBytes failed"); | "cudnnGetTensorSizeInBytes failed"); | ||||
| @@ -662,8 +662,9 @@ void Pipeline::Run() { | |||||
| auto manager = func_graph->manager(); | auto manager = func_graph->manager(); | ||||
| size_t graph_nums = manager->func_graphs().size(); | size_t graph_nums = manager->func_graphs().size(); | ||||
| if (graph_nums == 1) { | if (graph_nums == 1) { | ||||
| resource_->set_gpu_loopsink_flag(true); | |||||
| MS_LOG(INFO) << "Change gpu_loopsink_flag_ to true."; | |||||
| int64_t sinksize = ConfigManager::GetInstance().iter_num(); | |||||
| resource_->set_gpu_loopsink(true, sinksize); | |||||
| MS_LOG(INFO) << "Change gpu_loopsink_flag_ to true,set loopsink size to " << sinksize; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -834,7 +835,7 @@ py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) { | |||||
| } | } | ||||
| // Set loopsink size for each phase. | // Set loopsink size for each phase. | ||||
| bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag(); | bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag(); | ||||
| int64_t sinksize = ConfigManager::GetInstance().iter_num(); | |||||
| int64_t sinksize = info_[phase_s]->resource->gpu_loopsink_size(); | |||||
| ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1); | ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1); | ||||
| // If target is not gpu or is loopsink, keep vmloop 1. | // If target is not gpu or is loopsink, keep vmloop 1. | ||||
| bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice); | bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice); | ||||
| @@ -74,8 +74,12 @@ class Resource : public ResourceBase { | |||||
| const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; } | const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; } | ||||
| void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; } | void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; } | ||||
| void set_gpu_loopsink_flag(const bool &flag) { gpu_loopsink_flag_ = flag; } | |||||
| void set_gpu_loopsink(const bool &flag, const int64_t size) { | |||||
| gpu_loopsink_flag_ = flag; | |||||
| gpu_loopsink_size_ = size; | |||||
| } | |||||
| bool gpu_loopsink_flag() { return gpu_loopsink_flag_; } | bool gpu_loopsink_flag() { return gpu_loopsink_flag_; } | ||||
| int64_t gpu_loopsink_size() { return gpu_loopsink_size_; } | |||||
| // Reclaim resource and clear the cache. | // Reclaim resource and clear the cache. | ||||
| // ExecutorPy::Compile() can be called multiple times, so cache | // ExecutorPy::Compile() can be called multiple times, so cache | ||||
| @@ -89,6 +93,7 @@ class Resource : public ResourceBase { | |||||
| py::object input_; | py::object input_; | ||||
| bool is_cleaned_; | bool is_cleaned_; | ||||
| bool gpu_loopsink_flag_{false}; | bool gpu_loopsink_flag_{false}; | ||||
| int64_t gpu_loopsink_size_{1}; | |||||
| }; | }; | ||||
| using ResourcePtr = std::shared_ptr<pipeline::Resource>; | using ResourcePtr = std::shared_ptr<pipeline::Resource>; | ||||
| @@ -129,6 +129,8 @@ class DatasetHelper: | |||||
| Validator.check_is_int(sink_size) | Validator.check_is_int(sink_size) | ||||
| if sink_size < -1 or sink_size == 0: | if sink_size < -1 or sink_size == 0: | ||||
| raise ValueError("The sink_size must be -1 or positive, but got sink_size {}.".format(sink_size)) | raise ValueError("The sink_size must be -1 or positive, but got sink_size {}.".format(sink_size)) | ||||
| if sink_size == -1: | |||||
| sink_size = dataset.get_dataset_size() | |||||
| if dataset_sink_mode: | if dataset_sink_mode: | ||||
| if context.get_context("enable_ge"): | if context.get_context("enable_ge"): | ||||
| @@ -14,15 +14,14 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| # an simple tutorial as follows, more parameters can be setting | # an simple tutorial as follows, more parameters can be setting | ||||
| if [ $# != 3 ] | |||||
| if [ $# != 2 ] | |||||
| then | then | ||||
| echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]" | |||||
| echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH]" | |||||
| exit 1 | exit 1 | ||||
| fi | fi | ||||
| export DATASET_NAME=$1 | export DATASET_NAME=$1 | ||||
| export DATA_PATH=$2 | export DATA_PATH=$2 | ||||
| export DEVICE_ID=$3 | |||||
| python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \ | python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \ | ||||
| --device_id=$DEVICE_ID --device_target="GPU" > log 2>&1 & | |||||
| --device_target="GPU" > log 2>&1 & | |||||
| @@ -345,11 +345,11 @@ epoch: 5 step: 5004, loss is 3.3501816 | |||||
| ``` | ``` | ||||
| # ========START RESNET50 GPU BENCHMARK======== | # ========START RESNET50 GPU BENCHMARK======== | ||||
| step time: 12416.098 ms, fps: 412 img/sec. epoch: 1 step: 20, loss is 6.940182 | |||||
| step time: 3472.037 ms, fps: 1474 img/sec. epoch: 2 step: 20, loss is 7.078993 | |||||
| step time: 3469.523 ms, fps: 1475 img/sec. epoch: 3 step: 20, loss is 7.559594 | |||||
| step time: 3460.311 ms, fps: 1479 img/sec. epoch: 4 step: 20, loss is 6.920937 | |||||
| step time: 3460.543 ms, fps: 1479 img/sec. epoch: 5 step: 20, loss is 6.814013 | |||||
| Epoch time: 12416.098 ms, fps: 412 img/sec. epoch: 1 step: 20, loss is 6.940182 | |||||
| Epoch time: 3472.037 ms, fps: 1474 img/sec. epoch: 2 step: 20, loss is 7.078993 | |||||
| Epoch time: 3469.523 ms, fps: 1475 img/sec. epoch: 3 step: 20, loss is 7.559594 | |||||
| Epoch time: 3460.311 ms, fps: 1479 img/sec. epoch: 4 step: 20, loss is 6.920937 | |||||
| Epoch time: 3460.543 ms, fps: 1479 img/sec. epoch: 5 step: 20, loss is 6.814013 | |||||
| ... | ... | ||||
| ``` | ``` | ||||
| ## [Evaluation Process](#contents) | ## [Evaluation Process](#contents) | ||||
| @@ -53,7 +53,7 @@ class MyTimeMonitor(Callback): | |||||
| def step_end(self, run_context): | def step_end(self, run_context): | ||||
| step_mseconds = (time.time() - self.step_time) * 1000 | step_mseconds = (time.time() - self.step_time) * 1000 | ||||
| fps = self.batch_size / step_mseconds *1000 * self.size | fps = self.batch_size / step_mseconds *1000 * self.size | ||||
| print("step time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ") | |||||
| print("Epoch time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ") | |||||
| def pad(image): | def pad(image): | ||||
| zeros = np.zeros([224, 224, 1], dtype=np.uint8) | zeros = np.zeros([224, 224, 1], dtype=np.uint8) | ||||