Browse Source

Update transforms.py

pull/1/head
eptq002345 2 years ago
parent
commit
de1c0861ba
1 changed files with 946 additions and 2 deletions
  1. +946
    -2
      mindspore/dataset/audio/transforms.py

+ 946
- 2
mindspore/dataset/audio/transforms.py View File

@@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd
# Copyright 2021-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,6 +11,950 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
The module audio.transforms is inherited from _c_dataengine.
The module audio.transforms is inherited from _c_dataengine and is
implemented based on C++. It's a high performance module to process
audio. Users can apply suitable augmentations on audio data to improve
their training models.
"""

import numpy as np

import mindspore._c_dataengine as cde
from ..transforms.c_transforms import TensorOperation
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormType, \
ScaleType, WindowType
from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, check_mask_along_axis, \
check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, check_overdrive, \
check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, check_spectral_centroid, \
check_spectrogram, check_time_stretch, check_treble_biquad, check_vol


class AudioTensorOperation(TensorOperation):
"""
Base class of Audio Tensor Ops.
"""

def __call__(self, *input_tensor_list):
for tensor in input_tensor_list:
if not isinstance(tensor, (np.ndarray,)):
raise TypeError("Input should be NumPy audio, got {}.".format(type(tensor)))
return super().__call__(*input_tensor_list)

def parse(self):
raise NotImplementedError("AudioTensorOperation has to implement parse() method.")

# 给音频波形施加双极点全通滤波器,其中心频率和带宽由入参指定。
class AllpassBiquad(AudioTensorOperation):
'''
构建一个全透过比特平滑的复合提取器
Args:
sample_rate: 采样率
central_freq: 中心频率
Q: 浓度
'''
@check_allpass_biquad
def __init__(self, sample_rate, central_freq, Q=0.707):
self.sample_rate = sample_rate
self.central_freq = central_freq
self.Q = Q

def parse(self):
return cde.AllpassBiquadOperation(self.sample_rate, self.central_freq, self.Q)


DE_C_SCALE_TYPE = {ScaleType.POWER: cde.ScaleType.DE_SCALE_TYPE_POWER,
ScaleType.MAGNITUDE: cde.ScaleType.DE_SCALE_TYPE_MAGNITUDE}

# 将输入音频从振幅/功率标度转换为分贝标度。
class AmplitudeToDB(AudioTensorOperation):
'''
Args:
stype: 输入音频的原始标度
ref_value: 用于计算分贝系数
amin: 波形取值下界,低于该值的波形将会被裁切
top_db: 最小截止分贝值
'''
@check_amplitude_to_db
# 定义AmplitudeToDB类
def __init__(self, stype=ScaleType.POWER, ref_value=1.0, amin=1e-10, top_db=80.0):
self.stype = stype
self.ref_value = ref_value
self.amin = amin
self.top_db = top_db

def parse(self):
# 返回AmplitudeToDBOperation类的实例
return cde.AmplitudeToDBOperation(DE_C_SCALE_TYPE[self.stype], self.ref_value, self.amin, self.top_db)

# 计算复数序列的角度。
class Angle(AudioTensorOperation):
'''
角度转换,输入为一个音频张量,输出为一个角度张量
'''
def parse(self):
return cde.AngleOperation()

class BandBiquad(AudioTensorOperation):
@check_band_biquad
def __init__(self, sample_rate, central_freq, Q=0.707, noise=False):
self.sample_rate = sample_rate
self.central_freq = central_freq
self.Q = Q
self.noise = noise

def parse(self):
return cde.BandBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.noise)

# 给音频波形施加双极点巴特沃斯(Butterworth)带通滤波器
class BandpassBiquad(AudioTensorOperation):
@check_bandpass_biquad
def __init__(self, sample_rate, central_freq, Q=0.707, const_skirt_gain=False):
self.sample_rate = sample_rate
self.central_freq = central_freq
self.Q = Q
self.const_skirt_gain = const_skirt_gain

def parse(self):
return cde.BandpassBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.const_skirt_gain)

# 给音频波形施加双极点巴特沃斯(Butterworth)带通滤波器
class BandrejectBiquad(AudioTensorOperation):
'''
构造一个带通滤波器
'''
@check_bandreject_biquad
def __init__(self, sample_rate, central_freq, Q=0.707):
'''
构造一个带通滤波器
:param sample_rate: 样本采样率
:param central_freq: 中心频率
:param Q: 浓度
'''
self.sample_rate = sample_rate
self.central_freq = central_freq
self.Q = Q

def parse(self):
return cde.BandrejectBiquadOperation(self.sample_rate, self.central_freq, self.Q)

# 给音频波形施加低音控制效果,即双极点低频搁架滤波器
class BassBiquad(AudioTensorOperation):
'''
低音双二阶滤波器
'''
@check_bass_biquad
def __init__(self, sample_rate, gain, central_freq=100.0, Q=0.707):
'''
:param sample_rate: 波形的采样率
:param gain: 波形的增益
:param central_freq: 波形的中心频率
:param Q: 波形的比特率
'''
self.sample_rate = sample_rate
self.gain = gain
self.central_freq = central_freq
self.Q = Q

def parse(self):
return cde.BassBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.Q)

# 给音频波形施加双二阶滤波器
class Biquad(TensorOperation):
'''
Biquad类:
参数:
b0:比特系数b0
b1:比特系数b1
b2:比特系数b2
a0:比特系数a0
a1:比特系数a1
a2:比特系数a2
返回值:
cde.BiquadOperation
'''
@check_biquad
def __init__(self, b0, b1, b2, a0, a1, a2):
self.b0 = b0
self.b1 = b1
self.b2 = b2
self.a0 = a0
self.a1 = a1
self.a2 = a2

def parse(self):
return cde.BiquadOperation(self.b0, self.b1, self.b2, self.a0, self.a1, self.a2)

# 计算复数序列的范数
class ComplexNorm(AudioTensorOperation):
'''
计算复数归一化的操作
'''
@check_complex_norm
def __init__(self, power=1.0):
'''
:param power: 指数
'''
self.power = power

def parse(self):
return cde.ComplexNormOperation(self.power)


DE_C_BORDER_TYPE = {
BorderType.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
BorderType.EDGE: cde.BorderType.DE_BORDER_EDGE,
BorderType.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
BorderType.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC,
}

# 计算频谱的delta系数,也叫差分系数
class ComputeDeltas(AudioTensorOperation):
'''
计算梯度
'''
@check_compute_deltas
def __init__(self, win_length=5, pad_mode=BorderType.EDGE):
'''
初始化函数
:param win_length: 窗口长度
:param pad_mode: 填充模式
'''
self.win_len = win_length
self.pad_mode = pad_mode

def parse(self):
return cde.ComputeDeltasOperation(self.win_len, DE_C_BORDER_TYPE[self.pad_mode])

# 给音频波形施加对比度增强效果
class Contrast(AudioTensorOperation):
'''
计算谱和音频的对比度
'''
@check_contrast
def __init__(self, enhancement_amount=75.0):
'''
:param enhancement_amount: 对比度的增强量
'''
self.enhancement_amount = enhancement_amount

def parse(self):
return cde.ContrastOperation(self.enhancement_amount)

# 将音频波形从分贝转换为功率或振幅
class DBToAmplitude(AudioTensorOperation):
'''
转换DB值到相位值
'''
@check_db_to_amplitude
def __init__(self, ref, power):
'''
:param ref: 参考值
:param power: 相位值
'''
self.ref = ref
self.power = power

def parse(self):
return cde.DBToAmplitudeOperation(self.ref, self.power)

# 对输入音频波形施加直流移位
class DCShift(AudioTensorOperation):
'''
计算音频的DC偏移量
'''
@check_dc_shift
def __init__(self, shift, limiter_gain=None):
'''
:param shift: 音频的DC偏移量
:param limiter_gain: 限制器的增益
'''
self.shift = shift
self.limiter_gain = limiter_gain if limiter_gain else shift

def parse(self):
return cde.DCShiftOperation(self.shift, self.limiter_gain)

# 给音频波形施加CD(IEC 60908)去重音(一种高音衰减搁置滤波器)效果
class DeemphBiquad(AudioTensorOperation):
'''
计算DeemphBiquad操作
'''
@check_deemph_biquad
def __init__(self, sample_rate):
'''
:param sample_rate: 波形的采样频率
'''
self.sample_rate = sample_rate

def parse(self):
return cde.DeemphBiquadOperation(self.sample_rate)

# 检测音调频率
class DetectPitchFrequency(AudioTensorOperation):
'''
计算音高频率的检测
'''
@check_detect_pitch_frequency
def __init__(self, sample_rate, frame_time=0.01, win_length=30, freq_low=85, freq_high=3400):
'''
初始化检测音高频率的操作
:param sample_rate: 采样率
:param frame_time: 滑动窗口的时间间隔
:param win_length: 窗口的长度
:param freq_low: 最低频率
:param freq_high: 最高频率
'''
self.sample_rate = sample_rate
self.frame_time = frame_time
self.win_length = win_length
self.freq_low = freq_low
self.freq_high = freq_high

def parse(self):
return cde.DetectPitchFrequencyOperation(self.sample_rate, self.frame_time,
self.win_length, self.freq_low, self.freq_high)


DE_C_DENSITY_FUNCTION = {DensityFunction.TPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_TPDF,
DensityFunction.RPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_RPDF,
DensityFunction.GPDF: cde.DensityFunction.DE_DENSITY_FUNCTION_GPDF}

# 通过消除非线性截断失真,来抖动增加存储在特定位深的音频的动态感知范围
class Dither(AudioTensorOperation):
'''
添加噪声的类
'''
@check_dither
def __init__(self, density_function=DensityFunction.TPDF, noise_shaping=False):
'''
初始化添加噪声的类
:param density_function: 添加噪声的混合模式
:param noise_shaping: 是否使用噪声矫正
'''
self.density_function = density_function
self.noise_shaping = noise_shaping

def parse(self):
'''
解析添加噪声的类
:return:
'''
return cde.DitherOperation(DE_C_DENSITY_FUNCTION[self.density_function], self.noise_shaping)

# 给音频波形施加双二次均衡器滤波器
class EqualizerBiquad(AudioTensorOperation):
'''
比较器比特线的高通滤波器
'''
@check_equalizer_biquad
def __init__(self, sample_rate, center_freq, gain, Q=0.707):
'''
:param sample_rate: 样本采样率
:param center_freq: 输入频率
:param gain: 比特率
:param Q: 浓度
'''
self.sample_rate = sample_rate
self.center_freq = center_freq
self.gain = gain
self.Q = Q

def parse(self):
return cde.EqualizerBiquadOperation(self.sample_rate, self.center_freq, self.gain, self.Q)


DE_C_FADE_SHAPE = {FadeShape.QUARTER_SINE: cde.FadeShape.DE_FADE_SHAPE_QUARTER_SINE,
FadeShape.HALF_SINE: cde.FadeShape.DE_FADE_SHAPE_HALF_SINE,
FadeShape.LINEAR: cde.FadeShape.DE_FADE_SHAPE_LINEAR,
FadeShape.LOGARITHMIC: cde.FadeShape.DE_FADE_SHAPE_LOGARITHMIC,
FadeShape.EXPONENTIAL: cde.FadeShape.DE_FADE_SHAPE_EXPONENTIAL}

# 向波形添加淡入和/或淡出
class Fade(AudioTensorOperation):
'''
淡入淡出操作
'''
@check_fade
def __init__(self, fade_in_len=0, fade_out_len=0, fade_shape=FadeShape.LINEAR):
'''
Args:
fade_in_len: 淡入长度,默认为0
fade_out_len: 淡出长度,默认为0
fade_shape: 淡入淡出形状,默认为LINEAR
'''
self.fade_in_len = fade_in_len
self.fade_out_len = fade_out_len
self.fade_shape = fade_shape

def parse(self):
return cde.FadeOperation(self.fade_in_len, self.fade_out_len, DE_C_FADE_SHAPE[self.fade_shape])


DE_C_MODULATION = {Modulation.SINUSOIDAL: cde.Modulation.DE_MODULATION_SINUSOIDAL,
Modulation.TRIANGULAR: cde.Modulation.DE_MODULATION_TRIANGULAR}

DE_C_INTERPOLATION = {Interpolation.LINEAR: cde.Interpolation.DE_INTERPOLATION_LINEAR,
Interpolation.QUADRATIC: cde.Interpolation.DE_INTERPOLATION_QUADRATIC}

# 给音频施加镶边效果
class Flanger(AudioTensorOperation):
'''
Flanger操作
'''
@check_flanger
def __init__(self, sample_rate, delay=0.0, depth=2.0, regen=0.0, width=71.0, speed=0.5,
phase=25.0, modulation=Modulation.SINUSOIDAL, interpolation=Interpolation.LINEAR):
'''
初始化Flanger操作
:param sample_rate: 样本频率
:param delay: 延迟
:param depth: 深度
:param regen: 波缓冲
:param width: 宽度
:param speed: 速度
:param phase: 偏移
:param modulation: 模组
:param interpolation: 插值
'''
self.sample_rate = sample_rate
self.delay = delay
self.depth = depth
self.regen = regen
self.width = width
self.speed = speed
self.phase = phase
self.modulation = modulation
self.interpolation = interpolation

def parse(self):
return cde.FlangerOperation(self.sample_rate, self.delay, self.depth, self.regen, self.width, self.speed,
self.phase, DE_C_MODULATION[self.modulation],
DE_C_INTERPOLATION[self.interpolation])

# 给音频波形施加频域掩码
class FrequencyMasking(AudioTensorOperation):
'''
对音频数据进行频率掩码
'''
@check_masking
def __init__(self, iid_masks=False, freq_mask_param=0, mask_start=0, mask_value=0.0):
'''
频率掩码参数:
iid_masks:是否使用IID掩码
freq_mask_param:频率掩码参数
mask_start:掩码开始位置
mask_value:掩码值
'''
self.iid_masks = iid_masks
self.frequency_mask_param = freq_mask_param
self.mask_start = mask_start
self.mask_value = mask_value

def parse(self):
return cde.FrequencyMaskingOperation(self.iid_masks, self.frequency_mask_param, self.mask_start,
self.mask_value)

# 放大或衰减整个音频波形
class Gain(AudioTensorOperation):
'''
计算音频的增益
'''
@check_gain
def __init__(self, gain_db=1.0):
'''
:param gain_db: 增益的数值,取值范围为[-100, 100]
'''
self.gain_db = gain_db

def parse(self):
return cde.GainOperation(self.gain_db)

# 使用Griffin-Lim算法从线性幅度频谱图中计算信号波形
class GriffinLim(AudioTensorOperation):
'''
GriffinLim算法
'''
@check_griffin_lim
def __init__(self, n_fft=400, n_iter=32, win_length=None, hop_length=None, window_type=WindowType.HANN,
power=2, momentum=0.99, length=None, rand_init=True):
'''
参数:
n_fft: 窗口的长度
n_iter: 步长
win_length: 窗口的长度,如果没有指定,则使用n_fft
hop_length: 窗口的步长,如果没有指定,则使用win_length的一半
window_type: 窗口类型
power: 窗口的加权平方根
momentum: 梯度移动平均系数
length: 数据长度,如果没有指定,则使用数据的长度
rand_init: 是否使用随机初始化
'''
self.n_fft = n_fft
self.n_iter = n_iter
self.win_length = win_length if win_length else self.n_fft
self.hop_length = hop_length if hop_length else self.win_length // 2
self.window_type = window_type
self.power = power
self.momentum = momentum
self.length = length if length else 0
self.rand_init = rand_init

def parse(self):
return cde.GriffinLimOperation(self.n_fft, self.n_iter, self.win_length, self.hop_length,
DE_C_WINDOW_TYPE.get(self.window_type), self.power, self.momentum, self.length,
self.rand_init)

# 给音频波形上施加双二阶高通滤波器
class HighpassBiquad(AudioTensorOperation):
'''
高通滤波器,可以用于高通滤波器的高通滤波器
'''
@check_highpass_biquad
def __init__(self, sample_rate, cutoff_freq, Q=0.707):
'''
:param sample_rate: 样本采样率
:param cutoff_freq: 过滤频率
:param Q: 过滤系数
'''
self.sample_rate = sample_rate
self.cutoff_freq = cutoff_freq
self.Q = Q

def parse(self):
return cde.HighpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.Q)

# 根据指定的差分方程施加IIR滤波器
class LFilter(AudioTensorOperation):
'''
过滤器,可以通过调用LFilterOperation的parse方法获取
'''
@check_lfilter
def __init__(self, a_coeffs, b_coeffs, clamp=True):
'''
:param a_coeffs: a_coeffs[0]为a的系数,a_coeffs[1]为b的系数,a_coeffs[2]为b的系数,...
:param b_coeffs: b_coeffs[0]为a的系数,b_coeffs[1]为b的系数,b_coeffs[2]为b的系数,...
:param clamp: 是否禁用clamp
'''
self.a_coeffs = a_coeffs
self.b_coeffs = b_coeffs
self.clamp = clamp

def parse(self):
return cde.LFilterOperation(self.a_coeffs, self.b_coeffs, self.clamp)

# 给音频波形施加双极点低通滤波器
class LowpassBiquad(AudioTensorOperation):
'''
计算一个低通滤波器,参数为采样率,周期,Q值
'''
@check_lowpass_biquad
def __init__(self, sample_rate, cutoff_freq, Q=0.707):
'''
初始化低通滤波器
:param sample_rate: 采样率
:param cutoff_freq: 周期
:param Q: Q值
'''
self.sample_rate = sample_rate
self.cutoff_freq = cutoff_freq
self.Q = Q

def parse(self):
return cde.LowpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.Q)

# 将shape为(..., 2)的复值光谱图分离,输出幅度和相位
class Magphase(AudioTensorOperation):
'''
计算输入的音频数据的相位
'''
@check_magphase
def __init__(self, power=1.0):
'''
初始化Magphase类
:param power: 相位的平方根
'''
self.power = power

def parse(self):
return cde.MagphaseOperation(self.power)

# 对音频波形应用掩码
class MaskAlongAxis(AudioTensorOperation):
'''
按照指定的轴mask,从mask_start开始,按照mask_width宽度mask,mask_value值mask
'''
@check_mask_along_axis
def __init__(self, mask_start, mask_width, mask_value, axis):
'''
:param mask_start: 指定mask的起始位置
:param mask_width: 指定mask的宽度
:param mask_value: 指定mask的值
:param axis: 指定mask的轴
'''
self.mask_start = mask_start
self.mask_width = mask_width
self.mask_value = mask_value
self.axis = axis

def parse(self):
return cde.MaskAlongAxisOperation(self.mask_start, self.mask_width, self.mask_value, self.axis)

# 对音频波形沿 axis 轴应用掩码
class MaskAlongAxisIID(AudioTensorOperation):
'''
按照指定的维度mask,指定的值
'''
@check_mask_along_axis_iid
def __init__(self, mask_param, mask_value, axis):
'''
:param mask_param: mask的参数
:param mask_value: mask的值
:param axis: mask的维度
'''
self.mask_param = mask_param
self.mask_value = mask_value
self.axis = axis

def parse(self):
return cde.MaskAlongAxisIIDOperation(self.mask_param, self.mask_value, self.axis)


DE_C_MEL_TYPE = {MelType.SLANEY: cde.MelType.DE_MEL_TYPE_SLANEY,
MelType.HTK: cde.MelType.DE_MEL_TYPE_HTK}

DE_C_NORM_TYPE = {NormType.NONE: cde.NormType.DE_NORM_TYPE_NONE,
NormType.SLANEY: cde.NormType.DE_NORM_TYPE_SLANEY}

# 将普通STFT转换为梅尔尺度的STFT
class MelScale(AudioTensorOperation):
'''
MelScale操作
'''
@check_mel_scale
def __init__(self, n_mels=128, sample_rate=16000, f_min=0, f_max=None, n_stft=201, norm=NormType.NONE,
mel_type=MelType.HTK):
'''
初始化MelScale操作
:param n_mels: mel空间的大小
:param sample_rate: 采样率
:param f_min: 将音频转换为mel空间的最小频率
:param f_max: 将音频转换为mel空间的最大频率,如果没有指定则默认为采样率的一半
:param n_stft: 将音频转换为mel空间的维度
:param norm: 标准化类型
:param mel_type: mel空间类型
'''
self.n_mels = n_mels
self.sample_rate = sample_rate
self.f_min = f_min
self.f_max = f_max if f_max is not None else sample_rate // 2
self.n_stft = n_stft
self.norm = norm
self.mel_type = mel_type

def parse(self):
return cde.MelScaleOperation(self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_stft,
DE_C_NORM_TYPE[self.norm], DE_C_MEL_TYPE[self.mel_type])

# 解码mu-law编码的信号,参考 mu-law算法
class MuLawDecoding(AudioTensorOperation):
'''
解码MuLaw编码的数据
'''
@check_mu_law_coding
def __init__(self, quantization_channels=256):
'''
构造函数
:param quantization_channels: 目标量化通道数
'''
self.quantization_channels = quantization_channels

def parse(self):
return cde.MuLawDecodingOperation(self.quantization_channels)

# 基于mu-law压缩的信号编码
class MuLawEncoding(AudioTensorOperation):
'''
编码器
'''
@check_mu_law_coding
def __init__(self, quantization_channels=256):
'''
初始化编码器
:param quantization_channels: 目标量化通道数
'''
self.quantization_channels = quantization_channels

def parse(self):
return cde.MuLawEncodingOperation(self.quantization_channels)

# 给音频波形施加过载效果
class Overdrive(AudioTensorOperation):
'''
设置音频的音量和颜色
'''
@check_overdrive
def __init__(self, gain=20.0, color=20.0):
'''
:param gain: 音量
:param color: 颜色
'''
self.gain = gain
self.color = color

def parse(self):
return cde.OverdriveOperation(self.gain, self.color)

# 给音频波形施加相位效果
class Phaser(AudioTensorOperation):
'''
Phaser操作:
'''
@check_phaser
def __init__(self, sample_rate, gain_in=0.4, gain_out=0.74,
delay_ms=3.0, decay=0.4, mod_speed=0.5, sinusoidal=True):
'''
初始化Phaser操作
参数:
sample_rate:采样率
gain_in:输入增益
gain_out:输出增益
delay_ms:延迟毫秒数
decay:衰减系数
mod_speed:模拟速度
sinusoidal:是否模拟
'''
self.decay = decay
self.delay_ms = delay_ms
self.gain_in = gain_in
self.gain_out = gain_out
self.mod_speed = mod_speed
self.sample_rate = sample_rate
self.sinusoidal = sinusoidal

def parse(self):
return cde.PhaserOperation(self.sample_rate, self.gain_in, self.gain_out,
self.delay_ms, self.decay, self.mod_speed, self.sinusoidal)

# 对给定的STFT频谱,在不改变音高的情况下以一定比率进行加速
class PhaseVocoder(AudioTensorOperation):
'''
基于频谱的频谱转换
'''
@check_phase_vocoder
def __init__(self, rate, phase_advance):
'''
:param rate: 采样率
:param phase_advance: 频谱转换的时间间隔
'''
self.rate = rate
self.phase_advance = cde.Tensor(phase_advance)

def parse(self):
return cde.PhaseVocoderOperation(self.rate, self.phase_advance)

# 对输入音频波形施加RIAA均衡
class RiaaBiquad(AudioTensorOperation):
'''
构建一个RiaaBiquad操作
'''
@check_riaa_biquad
def __init__(self, sample_rate):
'''
构建一个RiaaBiquad操作
:param sample_rate: 样本频率
'''
self.sample_rate = sample_rate

def parse(self):
return cde.RiaaBiquadOperation(self.sample_rate)

# 对每个话语应用滑动窗口倒谱均值(和可选方差)归一化
class SlidingWindowCmn(AudioTensorOperation):
'''
计算滑动窗口的CMN
'''
@check_sliding_window_cmn
def __init__(self, cmn_window=600, min_cmn_window=100, center=False, norm_vars=False):
'''
设置滑动窗口的CMN参数
:param cmn_window: 滑动窗口的长度
:param min_cmn_window: 最小的CMN长度
:param center: 是否在滑动窗口的中间
:param norm_vars: 是否归一化变量
'''
self.cmn_window = cmn_window
self.min_cmn_window = min_cmn_window
self.center = center
self.norm_vars = norm_vars

def parse(self):
return cde.SlidingWindowCmnOperation(self.cmn_window, self.min_cmn_window, self.center, self.norm_vars)


DE_C_WINDOW_TYPE = {WindowType.BARTLETT: cde.WindowType.DE_WINDOW_TYPE_BARTLETT,
WindowType.BLACKMAN: cde.WindowType.DE_WINDOW_TYPE_BLACKMAN,
WindowType.HAMMING: cde.WindowType.DE_WINDOW_TYPE_HAMMING,
WindowType.HANN: cde.WindowType.DE_WINDOW_TYPE_HANN,
WindowType.KAISER: cde.WindowType.DE_WINDOW_TYPE_KAISER}

# 计算每个通道沿时间轴的频谱中心
class SpectralCentroid(TensorOperation):
'''
计算振幅谱中心点
'''
@check_spectral_centroid
def __init__(self, sample_rate, n_fft=400, win_length=None, hop_length=None, pad=0, window=WindowType.HANN):
'''
:param sample_rate: 样本采样率
:param n_fft: 快速傅里叶变换的窗长度
:param win_length: 窗长度,默认为n_fft
:param hop_length: 间隔长度,默认为win_length // 2
:param pad: 填充,默认为0
:param window: 窗函数,默认为HANN
'''
self.sample_rate = sample_rate
self.pad = pad
self.window = window
self.n_fft = n_fft
self.win_length = win_length if win_length else n_fft
self.hop_length = hop_length if hop_length else self.win_length // 2

def parse(self):
return cde.SpectralCentroidOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length,
self.pad, DE_C_WINDOW_TYPE[self.window])

# 从音频信号创建其频谱
class Spectrogram(TensorOperation):
'''
计算频谱图
'''
@check_spectrogram
def __init__(self, n_fft=400, win_length=None, hop_length=None, pad=0, window=WindowType.HANN, power=2.0,
normalized=False, center=True, pad_mode=BorderType.REFLECT, onesided=True):
'''
初始化
:param n_fft: 窗口大小
:param win_length: 窗口长度,如果为None则使用窗口大小
:param hop_length: 间隔长度
:param pad: 填充
:param window: 窗口类型
:param power: 细胞平方
:param normalized: 标准化
:param center: 偏移
:param pad_mode: 填充模式
:param onesided: 奇数频谱
'''
self.n_fft = n_fft
self.win_length = win_length if win_length else n_fft
self.hop_length = hop_length if hop_length else self.win_length // 2
self.pad = pad
self.window = window
self.power = power
self.normalized = normalized
self.center = center
self.pad_mode = pad_mode
self.onesided = onesided

def parse(self):
return cde.SpectrogramOperation(self.n_fft, self.win_length, self.hop_length, self.pad,
DE_C_WINDOW_TYPE[self.window], self.power, self.normalized,
self.center, DE_C_BORDER_TYPE[self.pad_mode], self.onesided)

# 给音频波形施加时域掩码
class TimeMasking(AudioTensorOperation):
'''
掩码时间序列
'''
@check_masking
def __init__(self, iid_masks=False, time_mask_param=0, mask_start=0, mask_value=0.0):
'''
参数:
iid_masks:是否使用IID掩码
time_mask_param:时间掩码参数
mask_start:掩码起始位置
mask_value:掩码值
'''
self.iid_masks = iid_masks
self.time_mask_param = time_mask_param
self.mask_start = mask_start
self.mask_value = mask_value

def parse(self):
return cde.TimeMaskingOperation(self.iid_masks, self.time_mask_param, self.mask_start, self.mask_value)

# 以给定的比例拉伸音频短时傅里叶(Short Time Fourier Transform, STFT)频谱的时域,但不改变音频的音高
class TimeStretch(AudioTensorOperation):
'''
按照指定的hop_length和n_freq,将音频转换成固定的比例比特率
'''
@check_time_stretch
def __init__(self, hop_length=None, n_freq=201, fixed_rate=None):
'''
:param hop_length: 比特率的步长
:param n_freq: 频率的数量
:param fixed_rate: 固定的比例
'''
self.n_freq = n_freq
self.fixed_rate = fixed_rate

n_fft = (n_freq - 1) * 2
self.hop_length = hop_length if hop_length is not None else n_fft // 2
self.fixed_rate = fixed_rate if fixed_rate is not None else 1

def parse(self):
return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate)

# 给音频波形施加高音音调控制效果
class TrebleBiquad(AudioTensorOperation):
'''
提供音频谱滤波器的类
'''
@check_treble_biquad
def __init__(self, sample_rate, gain, central_freq=3000, Q=0.707):
'''
初始化
:param sample_rate: 样本采样率
:param gain: 增益
:param central_freq: 中心频率
:param Q: 系数
'''
self.sample_rate = sample_rate
self.gain = gain
self.central_freq = central_freq
self.Q = Q

def parse(self):
return cde.TrebleBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.Q)


DE_C_GAIN_TYPE = {GainType.AMPLITUDE: cde.GainType.DE_GAIN_TYPE_AMPLITUDE,
GainType.POWER: cde.GainType.DE_GAIN_TYPE_POWER,
GainType.DB: cde.GainType.DE_GAIN_TYPE_DB}

# 调整波形的音量
class Vol(AudioTensorOperation):
'''
添加一个增益和增益类型的操作
'''
@check_vol
def __init__(self, gain, gain_type=GainType.AMPLITUDE):
'''
:param gain: 增益值
:param gain_type: 增益类型
'''
self.gain = gain
self.gain_type = gain_type

def parse(self):
return cde.VolOperation(self.gain, DE_C_GAIN_TYPE[self.gain_type])

Loading…
Cancel
Save