From 3f9a5d041fad0ef244812b4385c47a56f8aee915 Mon Sep 17 00:00:00 2001 From: "bin.xue" Date: Wed, 3 Aug 2022 22:03:20 +0800 Subject: [PATCH] [to #42322933] feat: change ans&aec pipeline output type to bytes --- modelscope/pipelines/audio/ans_pipeline.py | 9 ++++++--- modelscope/pipelines/audio/linear_aec_pipeline.py | 7 ++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/modelscope/pipelines/audio/ans_pipeline.py b/modelscope/pipelines/audio/ans_pipeline.py index 38cb0043..e9cb8db3 100644 --- a/modelscope/pipelines/audio/ans_pipeline.py +++ b/modelscope/pipelines/audio/ans_pipeline.py @@ -113,10 +113,13 @@ class ANSPipeline(Pipeline): current_idx += stride else: outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy() - return {OutputKeys.OUTPUT_PCM: outputs[:nsamples]} + outputs = (outputs[:nsamples] * 32768).astype(np.int16).tobytes() + return {OutputKeys.OUTPUT_PCM: outputs} def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: if 'output_path' in kwargs.keys(): - sf.write(kwargs['output_path'], inputs[OutputKeys.OUTPUT_PCM], - self.SAMPLE_RATE) + sf.write( + kwargs['output_path'], + np.frombuffer(inputs[OutputKeys.OUTPUT_PCM], dtype=np.int16), + self.SAMPLE_RATE) return inputs diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py index 6047fb9f..b59bc475 100644 --- a/modelscope/pipelines/audio/linear_aec_pipeline.py +++ b/modelscope/pipelines/audio/linear_aec_pipeline.py @@ -126,6 +126,7 @@ class LinearAECPipeline(Pipeline): } """ output_data = self._process(inputs['feature'], inputs['base']) + output_data = output_data.astype(np.int16).tobytes() return {OutputKeys.OUTPUT_PCM: output_data} def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: @@ -145,9 +146,9 @@ class LinearAECPipeline(Pipeline): } """ if 'output_path' in kwargs.keys(): - wav.write(kwargs['output_path'], self.preprocessor.SAMPLE_RATE, - inputs[OutputKeys.OUTPUT_PCM].astype(np.int16)) - inputs[OutputKeys.OUTPUT_PCM] = inputs[OutputKeys.OUTPUT_PCM] / 32768.0 + wav.write( + kwargs['output_path'], self.preprocessor.SAMPLE_RATE, + np.frombuffer(inputs[OutputKeys.OUTPUT_PCM], dtype=np.int16)) return inputs def _process(self, fbanks, mixture):