lingcai.wl 3 years ago
parent
commit
0548d92de8
5 changed files with 9 additions and 5 deletions
  1. +2
    -2
      modelscope/models/multi_modal/ofa_for_all_tasks.py
  2. +1
    -1
      modelscope/pipelines/audio/linear_aec_pipeline.py
  3. +2
    -1
      modelscope/preprocessors/multi_modal.py
  4. +1
    -1
      modelscope/utils/demo_utils.py
  5. +3
    -0
      tests/pipelines/test_automatic_speech_recognition.py

+ 2
- 2
modelscope/models/multi_modal/ofa_for_all_tasks.py View File

@@ -152,8 +152,8 @@ class OfaForAllTasks(TorchModel):
region_tensor[:, ::2] /= input['w_resize_ratios'] region_tensor[:, ::2] /= input['w_resize_ratios']
region_tensor[:, 1::2] /= input['h_resize_ratios'] region_tensor[:, 1::2] /= input['h_resize_ratios']
return { return {
OutputKeys.BOXES: move_to_device(region_tensor,
torch.device('cpu')),
OutputKeys.BOXES:
move_to_device(region_tensor, torch.device('cpu')).tolist(),
OutputKeys.SCORES: [1.0] * region_tensor.shape[0] OutputKeys.SCORES: [1.0] * region_tensor.shape[0]
} }




+ 1
- 1
modelscope/pipelines/audio/linear_aec_pipeline.py View File

@@ -51,7 +51,7 @@ class LinearAECPipeline(Pipeline):


When invoke the class with pipeline.__call__(), you should provide two params: When invoke the class with pipeline.__call__(), you should provide two params:
Dict[str, Any] Dict[str, Any]
the path of wav fileseg:{
the path of wav files, eg:{
"nearend_mic": "/your/data/near_end_mic_audio.wav", "nearend_mic": "/your/data/near_end_mic_audio.wav",
"farend_speech": "/your/data/far_end_speech_audio.wav"} "farend_speech": "/your/data/far_end_speech_audio.wav"}
output_path (str, optional): "/your/output/audio_after_aec.wav" output_path (str, optional): "/your/output/audio_after_aec.wav"


+ 2
- 1
modelscope/preprocessors/multi_modal.py View File

@@ -8,6 +8,7 @@ from PIL import Image
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Preprocessors from modelscope.metainfo import Preprocessors
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks from modelscope.utils.constant import Fields, ModeKeys, ModelFile, Tasks
from .base import Preprocessor from .base import Preprocessor
@@ -137,7 +138,7 @@ class MPlugPreprocessor(Preprocessor):
def image_open(self, path: str) -> Tuple[Image.Image, int]: def image_open(self, path: str) -> Tuple[Image.Image, int]:
if path not in self._image_map: if path not in self._image_map:
index = len(self._image_map) index = len(self._image_map)
self._image_map[path] = (Image.open(path), index)
self._image_map[path] = (load_image(path), index)
return self._image_map[path] return self._image_map[path]


def __call__( def __call__(


+ 1
- 1
modelscope/utils/demo_utils.py View File

@@ -236,7 +236,7 @@ def postprocess(req, resp):
_, img_encode = cv2.imencode('.' + file_type, content) _, img_encode = cv2.imencode('.' + file_type, content)
img_bytes = img_encode.tobytes() img_bytes = img_encode.tobytes()
return type(img_bytes) return type(img_bytes)
elif file_type == 'wav':
else:
out_mem_file = io.BytesIO() out_mem_file = io.BytesIO()
out_mem_file.write(new_resp.get(output_key)) out_mem_file.write(new_resp.get(output_key))
return type(out_mem_file) return type(out_mem_file)


+ 3
- 0
tests/pipelines/test_automatic_speech_recognition.py View File

@@ -22,6 +22,9 @@ URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audi
LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz' LITTLE_TESTSETS_FILE = 'data_aishell.tar.gz'
LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz' LITTLE_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/data_aishell.tar.gz'


TFRECORD_TESTSETS_FILE = 'tfrecord.tar.gz'
TFRECORD_TESTSETS_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/datasets/tfrecord.tar.gz'



class AutomaticSpeechRecognitionTest(unittest.TestCase, class AutomaticSpeechRecognitionTest(unittest.TestCase,
DemoCompatibilityCheck): DemoCompatibilityCheck):


Loading…
Cancel
Save