| @@ -227,8 +227,10 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||||
| #endif | #endif | ||||
| mgb_assert(exec_success, "TensorRTOpr failed in execution."); | mgb_assert(exec_success, "TensorRTOpr failed in execution."); | ||||
| } else { | } else { | ||||
| #if MGB_ENABLE_JSON | |||||
| TensorRTProfiler trt_profiler; | TensorRTProfiler trt_profiler; | ||||
| m_context->setProfiler(&trt_profiler); | m_context->setProfiler(&trt_profiler); | ||||
| #endif // MGB_ENABLE_JSON | |||||
| // TensorRT documentation stated that IExecutionContext->execute | // TensorRT documentation stated that IExecutionContext->execute | ||||
| // "Synchronously execute inference on a batch", and it does not take a | // "Synchronously execute inference on a batch", and it does not take a | ||||
| // cudaStream_t, we expect it do a device synchronize. But it seems like | // cudaStream_t, we expect it do a device synchronize. But it seems like | ||||
| @@ -245,8 +247,10 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||||
| exec_success = m_context->execute(batch, m_trt_iobuf.data()); | exec_success = m_context->execute(batch, m_trt_iobuf.data()); | ||||
| #endif | #endif | ||||
| mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); | mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); | ||||
| #if MGB_ENABLE_JSON | |||||
| printf("TRT profile info of opr %s:\n", opr->name().c_str()); | printf("TRT profile info of opr %s:\n", opr->name().c_str()); | ||||
| trt_profiler.print_layer_times(); | trt_profiler.print_layer_times(); | ||||
| #endif // MGB_ENABLE_JSON | |||||
| } | } | ||||
| } | } | ||||