From 97a0087976ee2bb28c2069354d1dc691d512344e Mon Sep 17 00:00:00 2001 From: "bin.xue" Date: Mon, 20 Jun 2022 20:30:27 +0800 Subject: [PATCH] =?UTF-8?q?[to=20#42322933]=E8=AF=AD=E9=9F=B3=E4=BD=BF?= =?UTF-8?q?=E7=94=A8local=20import=E9=81=BF=E5=85=8D=E5=85=B6=E4=BB=96?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E5=8A=A0=E8=BD=BDtorchaudio?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat: local import torchaudio Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9097950 * feat: local import torchaudio --- modelscope/preprocessors/audio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelscope/preprocessors/audio.py b/modelscope/preprocessors/audio.py index a2c15714..bb10c89c 100644 --- a/modelscope/preprocessors/audio.py +++ b/modelscope/preprocessors/audio.py @@ -5,7 +5,6 @@ from typing import Any, Dict import numpy as np import scipy.io.wavfile as wav import torch -import torchaudio.compliance.kaldi as kaldi from numpy.ctypeslib import ndpointer from modelscope.utils.constant import Fields @@ -123,6 +122,8 @@ class Feature: if self.feat_type == 'raw': return utt elif self.feat_type == 'fbank': + # have to use local import before modelscope framework supoort lazy loading + import torchaudio.compliance.kaldi as kaldi if len(utt.shape) == 1: utt = utt.unsqueeze(0) feat = kaldi.fbank(utt, **self.fbank_config)