|
|
@@ -16,16 +16,11 @@ from modelscope.utils.test_utils import download_and_untar, test_level |
|
|
logger = get_logger() |
|
|
logger = get_logger() |
|
|
|
|
|
|
|
|
WAV_FILE = 'data/test/audios/asr_example.wav' |
|
|
WAV_FILE = 'data/test/audios/asr_example.wav' |
|
|
|
|
|
URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav' |
|
|
|
|
|
|
|
|
LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz' |
|
|
LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz' |
|
|
LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz' |
|
|
LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz' |
|
|
|
|
|
|
|
|
AISHELL1_TESTSETS_FILE = 'aishell1.tar.gz' |
|
|
|
|
|
AISHELL1_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/aishell1.tar.gz' |
|
|
|
|
|
|
|
|
|
|
|
TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz' |
|
|
|
|
|
TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
action_info = { |
|
|
action_info = { |
|
|
@@ -45,6 +40,10 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
'checking_item': OutputKeys.TEXT, |
|
|
'checking_item': OutputKeys.TEXT, |
|
|
'example': 'wav_example' |
|
|
'example': 'wav_example' |
|
|
}, |
|
|
}, |
|
|
|
|
|
'test_run_with_url_tf': { |
|
|
|
|
|
'checking_item': OutputKeys.TEXT, |
|
|
|
|
|
'example': 'wav_example' |
|
|
|
|
|
}, |
|
|
'test_run_with_wav_dataset_pytorch': { |
|
|
'test_run_with_wav_dataset_pytorch': { |
|
|
'checking_item': OutputKeys.TEXT, |
|
|
'checking_item': OutputKeys.TEXT, |
|
|
'example': 'dataset_example' |
|
|
'example': 'dataset_example' |
|
|
@@ -132,8 +131,8 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
def test_run_with_wav_pytorch(self): |
|
|
def test_run_with_wav_pytorch(self): |
|
|
'''run with single waveform file |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
"""run with single waveform file |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with waveform file (pytorch)...') |
|
|
logger.info('Run ASR test with waveform file (pytorch)...') |
|
|
|
|
|
|
|
|
@@ -145,8 +144,8 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
def test_run_with_pcm_pytorch(self): |
|
|
def test_run_with_pcm_pytorch(self): |
|
|
'''run with wav data |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
"""run with wav data |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with wav data (pytorch)...') |
|
|
logger.info('Run ASR test with wav data (pytorch)...') |
|
|
|
|
|
|
|
|
@@ -158,8 +157,8 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
def test_run_with_wav_tf(self): |
|
|
def test_run_with_wav_tf(self): |
|
|
'''run with single waveform file |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
"""run with single waveform file |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with waveform file (tensorflow)...') |
|
|
logger.info('Run ASR test with waveform file (tensorflow)...') |
|
|
|
|
|
|
|
|
@@ -171,8 +170,8 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
def test_run_with_pcm_tf(self): |
|
|
def test_run_with_pcm_tf(self): |
|
|
'''run with wav data |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
"""run with wav data |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with wav data (tensorflow)...') |
|
|
logger.info('Run ASR test with wav data (tensorflow)...') |
|
|
|
|
|
|
|
|
@@ -182,9 +181,20 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
model_id=self.am_tf_model_id, audio_in=audio, sr=sr) |
|
|
model_id=self.am_tf_model_id, audio_in=audio, sr=sr) |
|
|
self.check_result('test_run_with_pcm_tf', rec_result) |
|
|
self.check_result('test_run_with_pcm_tf', rec_result) |
|
|
|
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 0, 'skip test in current test level') |
|
|
|
|
|
def test_run_with_url_tf(self): |
|
|
|
|
|
"""run with single url file |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
logger.info('Run ASR test with url file (tensorflow)...') |
|
|
|
|
|
|
|
|
|
|
|
rec_result = self.run_pipeline( |
|
|
|
|
|
model_id=self.am_tf_model_id, audio_in=URL_FILE) |
|
|
|
|
|
self.check_result('test_run_with_url_tf', rec_result) |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') |
|
|
def test_run_with_wav_dataset_pytorch(self): |
|
|
def test_run_with_wav_dataset_pytorch(self): |
|
|
'''run with datasets, and audio format is waveform |
|
|
|
|
|
|
|
|
"""run with datasets, and audio format is waveform |
|
|
datasets directory: |
|
|
datasets directory: |
|
|
<dataset_path> |
|
|
<dataset_path> |
|
|
wav |
|
|
wav |
|
|
@@ -199,7 +209,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
... |
|
|
... |
|
|
transcript |
|
|
transcript |
|
|
data.text # hypothesis text |
|
|
data.text # hypothesis text |
|
|
''' |
|
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with waveform dataset (pytorch)...') |
|
|
logger.info('Run ASR test with waveform dataset (pytorch)...') |
|
|
logger.info('Downloading waveform testsets file ...') |
|
|
logger.info('Downloading waveform testsets file ...') |
|
|
@@ -215,7 +225,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
|
|
|
|
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') |
|
|
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') |
|
|
def test_run_with_wav_dataset_tf(self): |
|
|
def test_run_with_wav_dataset_tf(self): |
|
|
'''run with datasets, and audio format is waveform |
|
|
|
|
|
|
|
|
"""run with datasets, and audio format is waveform |
|
|
datasets directory: |
|
|
datasets directory: |
|
|
<dataset_path> |
|
|
<dataset_path> |
|
|
wav |
|
|
wav |
|
|
@@ -230,7 +240,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase): |
|
|
... |
|
|
... |
|
|
transcript |
|
|
transcript |
|
|
data.text # hypothesis text |
|
|
data.text # hypothesis text |
|
|
''' |
|
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
logger.info('Run ASR test with waveform dataset (tensorflow)...') |
|
|
logger.info('Run ASR test with waveform dataset (tensorflow)...') |
|
|
logger.info('Downloading waveform testsets file ...') |
|
|
logger.info('Downloading waveform testsets file ...') |
|
|
|