lingcai.wl 3 years ago
parent
commit
0548d92de8
5 changed files with 9 additions and 5 deletions
  1. +2
    -2
      modelscope/models/multi_modal/ofa_for_all_tasks.py
  2. +1
    -1
      modelscope/pipelines/audio/linear_aec_pipeline.py
  3. +2
    -1
      modelscope/preprocessors/multi_modal.py
  4. +1
    -1
      modelscope/utils/demo_utils.py
  5. +3
    -0
      tests/pipelines/test_automatic_speech_recognition.py

+ 2
- 2
modelscope/models/multi_modal/ofa_for_all_tasks.py View File

@@ -152,8 +152,8 @@ class OfaForAllTasks(TorchModel):
region_tensor[:, ::2] /= input['w_resize_ratios']
region_tensor[:, 1::2] /= input['h_resize_ratios']
return {
OutputKeys.BOXES: move_to_device(region_tensor,
torch.device('cpu')),
OutputKeys.BOXES:
move_to_device(region_tensor, torch.device('cpu')).tolist(),
OutputKeys.SCORES: [1.0] * region_tensor.shape[0]
}



+ 1
- 1
modelscope/pipelines/audio/linear_aec_pipeline.py View File

@@ -51,7 +51,7 @@ class LinearAECPipeline(Pipeline):

When invoke the class with pipeline.__call__(), you should provide two params:
Dict[str, Any]
the path of wav fileseg:{
the path of wav files, eg:{
"nearend_mic": "/your/data/near_end_mic_audio.wav",
"farend_speech": "/your/data/far_end_speech_audio.wav"}
output_path (str, optional): "/your/output/audio_after_aec.wav"


+ 2
- 1
modelscope/preprocessors/multi_modal.py View File

@@ -8,6 +8,7 @@ from PIL import Image
from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Preprocessors
from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks
from .base import Preprocessor
@@ -137,7 +138,7 @@ class MPlugPreprocessor(Preprocessor):
def image_open(self, path: str) -> Tuple[Image.Image, int]:
if path not in self._image_map:
index = len(self._image_map)
self._image_map[path] = (Image.open(path), index)
self._image_map[path] = (load_image(path), index)
return self._image_map[path]

def __call__(


+ 1
- 1
modelscope/utils/demo_utils.py View File

@@ -236,7 +236,7 @@ def postprocess(req, resp):
_, img_encode = cv2.imencode('.' + file_type, content)
img_bytes = img_encode.tobytes()
return type(img_bytes)
elif file_type == 'wav':
else:
out_mem_file = io.BytesIO()
out_mem_file.write(new_resp.get(output_key))
return type(out_mem_file)


+ 3
- 0
tests/pipelines/test_automatic_speech_recognition.py View File

@@ -22,6 +22,9 @@ URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audi
LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz'
LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz'

TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz'
TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz'


class AutomaticSpeechRecognitionTest(unittest.TestCase,
DemoCompatibilityCheck):


Loading…
Cancel
Save