modelscope
/
ModelScope

 
			
							# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tarfile
import unittest

import requests

from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level

logger = get_logger()

WAV_FILE = 'data/test/audios/asr_example.wav'

LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz'
LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz'

AISHELL1_TESTSETS_FILE = 'aishell1.tar.gz'
AISHELL1_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/aishell1.tar.gz'


def un_tar_gz(fname, dirs):
    t = tarfile.open(fname)
    t.extractall(path=dirs)


class AutomaticSpeechRecognitionTest(unittest.TestCase):

    def setUp(self) -> None:
        self._am_model_id = 'damo/speech_paraformer_asr_nat-aishell1-pytorch'
        # this temporary workspace dir will store waveform files
        self._workspace = os.path.join(os.getcwd(), '.tmp')
        if not os.path.exists(self._workspace):
            os.mkdir(self._workspace)

    @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
    def test_run_with_wav(self):
        '''run with single waveform file
        '''

        logger.info('Run ASR test with waveform file ...')

        wav_file_path = os.path.join(os.getcwd(), WAV_FILE)

        inference_16k_pipline = pipeline(
            task=Tasks.auto_speech_recognition, model=[self._am_model_id])
        self.assertTrue(inference_16k_pipline is not None)

        rec_result = inference_16k_pipline(wav_file_path)
        self.assertTrue(len(rec_result['rec_result']) > 0)
        self.assertTrue(rec_result['rec_result'] != 'None')
        '''
           result structure:
           {
               'rec_result': '每一天都要快乐喔'
           }
           or
           {
               'rec_result': 'None'
           }
        '''
        logger.info('test_run_with_wav rec result: '
                    + rec_result['rec_result'])

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_run_with_wav_dataset(self):
        '''run with datasets, and audio format is waveform
           datasets directory:
             <dataset_path>
               wav
                 test   # testsets
                   xx.wav
                   ...
                 dev    # devsets
                   yy.wav
                   ...
                 train  # trainsets
                   zz.wav
                   ...
               transcript
                 data.text  # hypothesis text
        '''

        logger.info('Run ASR test with waveform dataset ...')
        logger.info('Downloading waveform testsets file ...')

        # downloading pos_testsets file
        testsets_file_path = os.path.join(self._workspace,
                                          LITTLE_TESTSETS_FILE)
        if not os.path.exists(testsets_file_path):
            r = requests.get(LITTLE_TESTSETS_URL)
            with open(testsets_file_path, 'wb') as f:
                f.write(r.content)

        testsets_dir_name = os.path.splitext(
            os.path.basename(
                os.path.splitext(
                    os.path.basename(LITTLE_TESTSETS_FILE))[0]))[0]
        # dataset_path = <cwd>/.tmp/data_aishell/wav/test
        dataset_path = os.path.join(self._workspace, testsets_dir_name, 'wav',
                                    'test')

        # untar the dataset_path file
        if not os.path.exists(dataset_path):
            un_tar_gz(testsets_file_path, self._workspace)

        inference_16k_pipline = pipeline(
            task=Tasks.auto_speech_recognition, model=[self._am_model_id])
        self.assertTrue(inference_16k_pipline is not None)

        rec_result = inference_16k_pipline(wav_path=dataset_path)
        self.assertTrue(len(rec_result['datasets_result']) > 0)
        self.assertTrue(rec_result['datasets_result']['Wrd'] > 0)
        '''
           result structure:
           {
               'rec_result': 'None',
               'datasets_result':
                   {
                       'Wrd': 1654,           # the number of words
                       'Snt': 128,            # the number of sentences
                       'Corr': 1573,          # the number of correct words
                       'Ins': 1,              # the number of insert words
                       'Del': 1,              # the number of delete words
                       'Sub': 80,             # the number of substitution words
                       'wrong_words': 82,     # the number of wrong words
                       'wrong_sentences': 47, # the number of wrong sentences
                       'Err': 4.96,           # WER/CER
                       'S.Err': 36.72         # SER
                   }
            }
        '''
        logger.info('test_run_with_wav_dataset datasets result: ')
        logger.info(rec_result['datasets_result'])

    @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
    def test_run_with_ark_dataset(self):
        '''run with datasets, and audio format is kaldi_ark
           datasets directory:
             <dataset_path>
               test   # testsets
                 data.ark
                 data.scp
                 data.text
               dev    # devsets
                 data.ark
                 data.scp
                 data.text
               train  # trainsets
                 data.ark
                 data.scp
                 data.text
        '''

        logger.info('Run ASR test with ark dataset ...')
        logger.info('Downloading ark testsets file ...')

        # downloading pos_testsets file
        testsets_file_path = os.path.join(self._workspace,
                                          AISHELL1_TESTSETS_FILE)
        if not os.path.exists(testsets_file_path):
            r = requests.get(AISHELL1_TESTSETS_URL)
            with open(testsets_file_path, 'wb') as f:
                f.write(r.content)

        testsets_dir_name = os.path.splitext(
            os.path.basename(
                os.path.splitext(
                    os.path.basename(AISHELL1_TESTSETS_FILE))[0]))[0]
        # dataset_path = <cwd>/.tmp/aishell1/test
        dataset_path = os.path.join(self._workspace, testsets_dir_name, 'test')

        # untar the dataset_path file
        if not os.path.exists(dataset_path):
            un_tar_gz(testsets_file_path, self._workspace)

        inference_16k_pipline = pipeline(
            task=Tasks.auto_speech_recognition, model=[self._am_model_id])
        self.assertTrue(inference_16k_pipline is not None)

        rec_result = inference_16k_pipline(wav_path=dataset_path)
        self.assertTrue(len(rec_result['datasets_result']) > 0)
        self.assertTrue(rec_result['datasets_result']['Wrd'] > 0)
        '''
           result structure:
           {
               'rec_result': 'None',
               'datasets_result':
                   {
                       'Wrd': 104816,           # the number of words
                       'Snt': 7176,             # the number of sentences
                       'Corr': 99327,           # the number of correct words
                       'Ins': 104,              # the number of insert words
                       'Del': 155,              # the number of delete words
                       'Sub': 5334,             # the number of substitution words
                       'wrong_words': 5593,     # the number of wrong words
                       'wrong_sentences': 2898, # the number of wrong sentences
                       'Err': 5.34,             # WER/CER
                       'S.Err': 40.38           # SER
                   }
            }
        '''
        logger.info('test_run_with_ark_dataset datasets result: ')
        logger.info(rec_result['datasets_result'])


if __name__ == '__main__':
    unittest.main()