You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

audio.cc 32 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. /**
  2. * Copyright 2021-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/include/dataset/audio.h"
  17. #include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
  18. #include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
  19. #include "minddata/dataset/audio/ir/kernels/angle_ir.h"
  20. #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
  21. #include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
  22. #include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
  23. #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
  24. #include "minddata/dataset/audio/ir/kernels/biquad_ir.h"
  25. #include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h"
  26. #include "minddata/dataset/audio/ir/kernels/compute_deltas_ir.h"
  27. #include "minddata/dataset/audio/ir/kernels/contrast_ir.h"
  28. #include "minddata/dataset/audio/ir/kernels/db_to_amplitude_ir.h"
  29. #include "minddata/dataset/audio/ir/kernels/dc_shift_ir.h"
  30. #include "minddata/dataset/audio/ir/kernels/deemph_biquad_ir.h"
  31. #include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h"
  32. #include "minddata/dataset/audio/ir/kernels/dither_ir.h"
  33. #include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h"
  34. #include "minddata/dataset/audio/ir/kernels/fade_ir.h"
  35. #include "minddata/dataset/audio/ir/kernels/flanger_ir.h"
  36. #include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
  37. #include "minddata/dataset/audio/ir/kernels/gain_ir.h"
  38. #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
  39. #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
  40. #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
  41. #include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
  42. #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h"
  43. #include "minddata/dataset/audio/ir/kernels/mask_along_axis_ir.h"
  44. #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h"
  45. #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
  46. #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h"
  47. #include "minddata/dataset/audio/ir/kernels/overdrive_ir.h"
  48. #include "minddata/dataset/audio/ir/kernels/phase_vocoder_ir.h"
  49. #include "minddata/dataset/audio/ir/kernels/phaser_ir.h"
  50. #include "minddata/dataset/audio/ir/kernels/riaa_biquad_ir.h"
  51. #include "minddata/dataset/audio/ir/kernels/sliding_window_cmn_ir.h"
  52. #include "minddata/dataset/audio/ir/kernels/spectral_centroid_ir.h"
  53. #include "minddata/dataset/audio/ir/kernels/spectrogram_ir.h"
  54. #include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
  55. #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
  56. #include "minddata/dataset/audio/ir/kernels/treble_biquad_ir.h"
  57. #include "minddata/dataset/audio/ir/kernels/vol_ir.h"
  58. #include "minddata/dataset/audio/ir/validators.h"
  59. #include "minddata/dataset/audio/kernels/audio_utils.h"
  60. namespace mindspore {
  61. namespace dataset {
  62. namespace audio {
  63. // AllpassBiquad Transform Operation.
  64. struct AllpassBiquad::Data {
  65. Data(int32_t sample_rate, float central_freq, float Q)
  66. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  67. int32_t sample_rate_;
  68. float central_freq_;
  69. float Q_;
  70. };
  71. AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q)
  72. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  73. std::shared_ptr<TensorOperation> AllpassBiquad::Parse() {
  74. return std::make_shared<AllpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  75. }
  76. // AmplitudeToDB Transform Operation.
  77. struct AmplitudeToDB::Data {
  78. Data(ScaleType stype, float ref_value, float amin, float top_db)
  79. : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
  80. ScaleType stype_;
  81. float ref_value_;
  82. float amin_;
  83. float top_db_;
  84. };
  85. AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db)
  86. : data_(std::make_shared<Data>(stype, ref_value, amin, top_db)) {}
  87. std::shared_ptr<TensorOperation> AmplitudeToDB::Parse() {
  88. return std::make_shared<AmplitudeToDBOperation>(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_);
  89. }
  90. // Angle Transform Operation.
  91. Angle::Angle() = default;
  92. std::shared_ptr<TensorOperation> Angle::Parse() { return std::make_shared<AngleOperation>(); }
  93. // BandBiquad Transform Operation.
  94. struct BandBiquad::Data {
  95. Data(int32_t sample_rate, float central_freq, float Q, bool noise)
  96. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), noise_(noise) {}
  97. int32_t sample_rate_;
  98. float central_freq_;
  99. float Q_;
  100. bool noise_;
  101. };
  102. BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool noise)
  103. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, noise)) {}
  104. std::shared_ptr<TensorOperation> BandBiquad::Parse() {
  105. return std::make_shared<BandBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_);
  106. }
  107. // BandpassBiquad Transform Operation.
  108. struct BandpassBiquad::Data {
  109. Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  110. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {}
  111. int32_t sample_rate_;
  112. float central_freq_;
  113. float Q_;
  114. bool const_skirt_gain_;
  115. };
  116. BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  117. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, const_skirt_gain)) {}
  118. std::shared_ptr<TensorOperation> BandpassBiquad::Parse() {
  119. return std::make_shared<BandpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_,
  120. data_->const_skirt_gain_);
  121. }
  122. // BandrejectBiquad Transform Operation.
  123. struct BandrejectBiquad::Data {
  124. Data(int32_t sample_rate, float central_freq, float Q)
  125. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  126. int32_t sample_rate_;
  127. float central_freq_;
  128. float Q_;
  129. };
  130. BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q)
  131. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  132. std::shared_ptr<TensorOperation> BandrejectBiquad::Parse() {
  133. return std::make_shared<BandrejectBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  134. }
  135. // BassBiquad Transform Operation.
  136. struct BassBiquad::Data {
  137. Data(int32_t sample_rate, float gain, float central_freq, float Q)
  138. : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
  139. int32_t sample_rate_;
  140. float gain_;
  141. float central_freq_;
  142. float Q_;
  143. };
  144. BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
  145. : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
  146. std::shared_ptr<TensorOperation> BassBiquad::Parse() {
  147. return std::make_shared<BassBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
  148. }
  149. // Biquad Transform Operation.
  150. struct Biquad::Data {
  151. Data(float b0, float b1, float b2, float a0, float a1, float a2)
  152. : b0_(b0), b1_(b1), b2_(b2), a0_(a0), a1_(a1), a2_(a2) {}
  153. float b0_;
  154. float b1_;
  155. float b2_;
  156. float a0_;
  157. float a1_;
  158. float a2_;
  159. };
  160. Biquad::Biquad(float b0, float b1, float b2, float a0, float a1, float a2)
  161. : data_(std::make_shared<Data>(b0, b1, b2, a0, a1, a2)) {}
  162. std::shared_ptr<TensorOperation> Biquad::Parse() {
  163. return std::make_shared<BiquadOperation>(data_->b0_, data_->b1_, data_->b2_, data_->a0_, data_->a1_, data_->a1_);
  164. }
  165. // ComplexNorm Transform Operation.
  166. struct ComplexNorm::Data {
  167. explicit Data(float power) : power_(power) {}
  168. float power_;
  169. };
  170. ComplexNorm::ComplexNorm(float power) : data_(std::make_shared<Data>(power)) {}
  171. std::shared_ptr<TensorOperation> ComplexNorm::Parse() { return std::make_shared<ComplexNormOperation>(data_->power_); }
  172. // ComputeDeltas Transform Operation.
  173. struct ComputeDeltas::Data {
  174. Data(int32_t win_length, BorderType pad_mode) : win_length_(win_length), pad_mode_(pad_mode) {}
  175. int32_t win_length_;
  176. BorderType pad_mode_;
  177. };
  178. ComputeDeltas::ComputeDeltas(int32_t win_length, BorderType pad_mode)
  179. : data_(std::make_shared<Data>(win_length, pad_mode)) {}
  180. std::shared_ptr<TensorOperation> ComputeDeltas::Parse() {
  181. return std::make_shared<ComputeDeltasOperation>(data_->win_length_, data_->pad_mode_);
  182. }
  183. // Contrast Transform Operation.
  184. struct Contrast::Data {
  185. explicit Data(float enhancement_amount) : enhancement_amount_(enhancement_amount) {}
  186. float enhancement_amount_;
  187. };
  188. Contrast::Contrast(float enhancement_amount) : data_(std::make_shared<Data>(enhancement_amount)) {}
  189. std::shared_ptr<TensorOperation> Contrast::Parse() {
  190. return std::make_shared<ContrastOperation>(data_->enhancement_amount_);
  191. }
  192. // DBToAmplitude Transform Operation.
  193. struct DBToAmplitude::Data {
  194. explicit Data(float ref, float power) : ref_(ref), power_(power) {}
  195. float ref_;
  196. float power_;
  197. };
  198. DBToAmplitude::DBToAmplitude(float ref, float power) : data_(std::make_shared<Data>(ref, power)) {}
  199. std::shared_ptr<TensorOperation> DBToAmplitude::Parse() {
  200. return std::make_shared<DBToAmplitudeOperation>(data_->ref_, data_->power_);
  201. }
  202. // DCShift Transform Operation.
  203. struct DCShift::Data {
  204. Data(float shift, float limiter_gain) : shift_(shift), limiter_gain_(limiter_gain) {}
  205. float shift_;
  206. float limiter_gain_;
  207. };
  208. DCShift::DCShift(float shift) : data_(std::make_shared<Data>(shift, shift)) {}
  209. DCShift::DCShift(float shift, float limiter_gain) : data_(std::make_shared<Data>(shift, limiter_gain)) {}
  210. std::shared_ptr<TensorOperation> DCShift::Parse() {
  211. return std::make_shared<DCShiftOperation>(data_->shift_, data_->limiter_gain_);
  212. }
  213. Status CreateDct(mindspore::MSTensor *output, int32_t n_mfcc, int32_t n_mels, NormMode norm) {
  214. RETURN_UNEXPECTED_IF_NULL(output);
  215. RETURN_IF_NOT_OK(ValidateIntScalarPositive("CreateDct", "n_mfcc", n_mfcc));
  216. RETURN_IF_NOT_OK(ValidateIntScalarPositive("CreateDct", "n_mels", n_mels));
  217. std::shared_ptr<dataset::Tensor> dct;
  218. RETURN_IF_NOT_OK(Dct(&dct, n_mfcc, n_mels, norm));
  219. CHECK_FAIL_RETURN_UNEXPECTED(dct->HasData(), "CreateDct: get an empty tensor with shape " + dct->shape().ToString());
  220. *output = mindspore::MSTensor(std::make_shared<DETensor>(dct));
  221. return Status::OK();
  222. }
  223. // DeemphBiquad Transform Operation.
  224. struct DeemphBiquad::Data {
  225. explicit Data(int32_t sample_rate) : sample_rate_(sample_rate) {}
  226. int32_t sample_rate_;
  227. };
  228. DeemphBiquad::DeemphBiquad(int32_t sample_rate) : data_(std::make_shared<Data>(sample_rate)) {}
  229. std::shared_ptr<TensorOperation> DeemphBiquad::Parse() {
  230. return std::make_shared<DeemphBiquadOperation>(data_->sample_rate_);
  231. }
  232. // DetectPitchFrequency Transform Operation.
  233. struct DetectPitchFrequency::Data {
  234. Data(int32_t sample_rate, float frame_time, int32_t win_length, int32_t freq_low, int32_t freq_high)
  235. : sample_rate_(sample_rate),
  236. frame_time_(frame_time),
  237. win_length_(win_length),
  238. freq_low_(freq_low),
  239. freq_high_(freq_high) {}
  240. int32_t sample_rate_;
  241. float frame_time_;
  242. int32_t win_length_;
  243. int32_t freq_low_;
  244. int32_t freq_high_;
  245. };
  246. DetectPitchFrequency::DetectPitchFrequency(int32_t sample_rate, float frame_time, int32_t win_length, int32_t freq_low,
  247. int32_t freq_high)
  248. : data_(std::make_shared<Data>(sample_rate, frame_time, win_length, freq_low, freq_high)) {}
  249. std::shared_ptr<TensorOperation> DetectPitchFrequency::Parse() {
  250. return std::make_shared<DetectPitchFrequencyOperation>(data_->sample_rate_, data_->frame_time_, data_->win_length_,
  251. data_->freq_low_, data_->freq_high_);
  252. }
  253. // Dither Transform Operation.
  254. struct Dither::Data {
  255. Data(DensityFunction density_function, bool noise_shaping)
  256. : density_function_(density_function), noise_shaping_(noise_shaping) {}
  257. DensityFunction density_function_;
  258. bool noise_shaping_;
  259. };
  260. Dither::Dither(DensityFunction density_function, bool noise_shaping)
  261. : data_(std::make_shared<Data>(density_function, noise_shaping)) {}
  262. std::shared_ptr<TensorOperation> Dither::Parse() {
  263. return std::make_shared<DitherOperation>(data_->density_function_, data_->noise_shaping_);
  264. }
  265. // EqualizerBiquad Transform Operation.
  266. struct EqualizerBiquad::Data {
  267. Data(int32_t sample_rate, float center_freq, float gain, float Q)
  268. : sample_rate_(sample_rate), center_freq_(center_freq), gain_(gain), Q_(Q) {}
  269. int32_t sample_rate_;
  270. float center_freq_;
  271. float gain_;
  272. float Q_;
  273. };
  274. EqualizerBiquad::EqualizerBiquad(int32_t sample_rate, float center_freq, float gain, float Q)
  275. : data_(std::make_shared<Data>(sample_rate, center_freq, gain, Q)) {}
  276. std::shared_ptr<TensorOperation> EqualizerBiquad::Parse() {
  277. return std::make_shared<EqualizerBiquadOperation>(data_->sample_rate_, data_->center_freq_, data_->gain_, data_->Q_);
  278. }
  279. // Fade Transform Operation.
  280. struct Fade::Data {
  281. Data(int32_t fade_in_len, int32_t fade_out_len, FadeShape fade_shape)
  282. : fade_in_len_(fade_in_len), fade_out_len_(fade_out_len), fade_shape_(fade_shape) {}
  283. int32_t fade_in_len_;
  284. int32_t fade_out_len_;
  285. FadeShape fade_shape_;
  286. };
  287. Fade::Fade(int32_t fade_in_len, int32_t fade_out_len, FadeShape fade_shape)
  288. : data_(std::make_shared<Data>(fade_in_len, fade_out_len, fade_shape)) {}
  289. std::shared_ptr<TensorOperation> Fade::Parse() {
  290. return std::make_shared<FadeOperation>(data_->fade_in_len_, data_->fade_out_len_, data_->fade_shape_);
  291. }
  292. // Flanger Transform Operation.
  293. struct Flanger::Data {
  294. Data(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
  295. Modulation modulation, Interpolation interpolation)
  296. : sample_rate_(sample_rate),
  297. delay_(delay),
  298. depth_(depth),
  299. regen_(regen),
  300. width_(width),
  301. speed_(speed),
  302. phase_(phase),
  303. modulation_(modulation),
  304. interpolation_(interpolation) {}
  305. int32_t sample_rate_;
  306. float delay_;
  307. float depth_;
  308. float regen_;
  309. float width_;
  310. float speed_;
  311. float phase_;
  312. Modulation modulation_;
  313. Interpolation interpolation_;
  314. };
  315. Flanger::Flanger(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
  316. Modulation modulation, Interpolation interpolation)
  317. : data_(std::make_shared<Data>(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation)) {}
  318. std::shared_ptr<TensorOperation> Flanger::Parse() {
  319. return std::make_shared<FlangerOperation>(data_->sample_rate_, data_->delay_, data_->depth_, data_->regen_,
  320. data_->width_, data_->speed_, data_->phase_, data_->modulation_,
  321. data_->interpolation_);
  322. }
  323. // FrequencyMasking Transform Operation.
  324. struct FrequencyMasking::Data {
  325. Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  326. : iid_masks_(iid_masks),
  327. frequency_mask_param_(frequency_mask_param),
  328. mask_start_(mask_start),
  329. mask_value_(mask_value) {}
  330. bool iid_masks_;
  331. int32_t frequency_mask_param_;
  332. int32_t mask_start_;
  333. float mask_value_;
  334. };
  335. FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  336. : data_(std::make_shared<Data>(iid_masks, frequency_mask_param, mask_start, mask_value)) {}
  337. std::shared_ptr<TensorOperation> FrequencyMasking::Parse() {
  338. return std::make_shared<FrequencyMaskingOperation>(data_->iid_masks_, data_->frequency_mask_param_,
  339. data_->mask_start_, data_->mask_value_);
  340. }
  341. // Gain Transform Operation.
  342. struct Gain::Data {
  343. explicit Data(float gain_db) : gain_db_(gain_db) {}
  344. float gain_db_;
  345. };
  346. Gain::Gain(float gain_db) : data_(std::make_shared<Data>(gain_db)) {}
  347. std::shared_ptr<TensorOperation> Gain::Parse() { return std::make_shared<GainOperation>(data_->gain_db_); }
  348. // HighpassBiquad Transform Operation.
  349. struct HighpassBiquad::Data {
  350. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  351. int32_t sample_rate_;
  352. float cutoff_freq_;
  353. float Q_;
  354. };
  355. HighpassBiquad::HighpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  356. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  357. std::shared_ptr<TensorOperation> HighpassBiquad::Parse() {
  358. return std::make_shared<HighpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  359. }
  360. // LFilter Transform Operation.
  361. struct LFilter::Data {
  362. Data(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
  363. : a_coeffs_(a_coeffs), b_coeffs_(b_coeffs), clamp_(clamp) {}
  364. std::vector<float> a_coeffs_;
  365. std::vector<float> b_coeffs_;
  366. bool clamp_;
  367. };
  368. LFilter::LFilter(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
  369. : data_(std::make_shared<Data>(a_coeffs, b_coeffs, clamp)) {}
  370. std::shared_ptr<TensorOperation> LFilter::Parse() {
  371. return std::make_shared<LFilterOperation>(data_->a_coeffs_, data_->b_coeffs_, data_->clamp_);
  372. }
  373. // LowpassBiquad Transform Operation.
  374. struct LowpassBiquad::Data {
  375. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  376. int32_t sample_rate_;
  377. float cutoff_freq_;
  378. float Q_;
  379. };
  380. LowpassBiquad::LowpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  381. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  382. std::shared_ptr<TensorOperation> LowpassBiquad::Parse() {
  383. return std::make_shared<LowpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  384. }
  385. // Magphase Transform Operation.
  386. struct Magphase::Data {
  387. explicit Data(float power) : power_(power) {}
  388. float power_;
  389. };
  390. Magphase::Magphase(float power) : data_(std::make_shared<Data>(power)) {}
  391. std::shared_ptr<TensorOperation> Magphase::Parse() { return std::make_shared<MagphaseOperation>(data_->power_); }
  392. // MaskAlongAxis Transform Operation.
  393. struct MaskAlongAxis::Data {
  394. Data(int32_t mask_start, int32_t mask_width, float mask_value, int32_t axis)
  395. : mask_start_(mask_start), mask_width_(mask_width), mask_value_(mask_value), axis_(axis) {}
  396. int32_t mask_start_;
  397. int32_t mask_width_;
  398. float mask_value_;
  399. int32_t axis_;
  400. };
  401. MaskAlongAxis::MaskAlongAxis(int32_t mask_start, int32_t mask_width, float mask_value, int32_t axis)
  402. : data_(std::make_shared<Data>(mask_start, mask_width, mask_value, axis)) {}
  403. std::shared_ptr<TensorOperation> MaskAlongAxis::Parse() {
  404. return std::make_shared<MaskAlongAxisOperation>(data_->mask_start_, data_->mask_width_, data_->mask_value_,
  405. data_->axis_);
  406. }
  407. // MaskAlongAxisIID Transform Operation.
  408. struct MaskAlongAxisIID::Data {
  409. Data(int32_t mask_param, float mask_value, int32_t axis)
  410. : mask_param_(mask_param), mask_value_(mask_value), axis_(axis) {}
  411. int32_t mask_param_;
  412. float mask_value_;
  413. int32_t axis_;
  414. };
  415. MaskAlongAxisIID::MaskAlongAxisIID(int32_t mask_param, float mask_value, int32_t axis)
  416. : data_(std::make_shared<Data>(mask_param, mask_value, axis)) {}
  417. std::shared_ptr<TensorOperation> MaskAlongAxisIID::Parse() {
  418. return std::make_shared<MaskAlongAxisIIDOperation>(data_->mask_param_, data_->mask_value_, data_->axis_);
  419. }
  420. // MelScale Transform Operation.
  421. struct MelScale::Data {
  422. Data(int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t n_stft, NormType norm, MelType mel_type)
  423. : n_mels_(n_mels),
  424. sample_rate_(sample_rate),
  425. f_min_(f_min),
  426. f_max_(f_max),
  427. n_stft_(n_stft),
  428. norm_(norm),
  429. mel_type_(mel_type) {}
  430. int32_t n_mels_;
  431. int32_t sample_rate_;
  432. float f_min_;
  433. float f_max_;
  434. int32_t n_stft_;
  435. NormType norm_;
  436. MelType mel_type_;
  437. };
  438. MelScale::MelScale(int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t n_stft, NormType norm,
  439. MelType mel_type)
  440. : data_(std::make_shared<Data>(n_mels, sample_rate, f_min, f_max, n_stft, norm, mel_type)) {}
  441. std::shared_ptr<TensorOperation> MelScale::Parse() {
  442. return std::make_shared<MelScaleOperation>(data_->n_mels_, data_->sample_rate_, data_->f_min_, data_->f_max_,
  443. data_->n_stft_, data_->norm_, data_->mel_type_);
  444. }
  445. // MelscaleFbanks Function.
  446. Status MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels, int32_t sample_rate,
  447. NormType norm, MelType mel_type) {
  448. RETURN_UNEXPECTED_IF_NULL(output);
  449. CHECK_FAIL_RETURN_UNEXPECTED(n_freqs > 0,
  450. "MelscaleFbanks: n_freqs must be greater than 0, got: " + std::to_string(n_freqs));
  451. CHECK_FAIL_RETURN_UNEXPECTED(f_min >= 0, "MelscaleFbanks: f_min must be non negative, got: " + std::to_string(f_min));
  452. CHECK_FAIL_RETURN_UNEXPECTED(f_max > 0,
  453. "MelscaleFbanks: f_max must be greater than 0, got: " + std::to_string(f_max));
  454. CHECK_FAIL_RETURN_UNEXPECTED(n_mels > 0,
  455. "MelscaleFbanks: n_mels must be greater than 0, got: " + std::to_string(n_mels));
  456. CHECK_FAIL_RETURN_UNEXPECTED(
  457. sample_rate > 0, "MelscaleFbanks: sample_rate must be greater than 0, got: " + std::to_string(sample_rate));
  458. CHECK_FAIL_RETURN_UNEXPECTED(f_max > f_min, "MelscaleFbanks: f_max must be greater than f_min, got: f_min = " +
  459. std::to_string(f_min) + ", while f_max = " + std::to_string(f_max));
  460. std::shared_ptr<dataset::Tensor> fb;
  461. RETURN_IF_NOT_OK(CreateFbanks(&fb, n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type));
  462. CHECK_FAIL_RETURN_UNEXPECTED(fb->HasData(),
  463. "MelscaleFbanks: get an empty tensor with shape " + fb->shape().ToString());
  464. *output = mindspore::MSTensor(std::make_shared<DETensor>(fb));
  465. return Status::OK();
  466. }
  467. // MuLawDecoding Transform Operation.
  468. struct MuLawDecoding::Data {
  469. explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
  470. int32_t quantization_channels_;
  471. };
  472. MuLawDecoding::MuLawDecoding(int32_t quantization_channels) : data_(std::make_shared<Data>(quantization_channels)) {}
  473. std::shared_ptr<TensorOperation> MuLawDecoding::Parse() {
  474. return std::make_shared<MuLawDecodingOperation>(data_->quantization_channels_);
  475. }
  476. // MuLawEncoding Transform Operation.
  477. struct MuLawEncoding::Data {
  478. explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
  479. int32_t quantization_channels_;
  480. };
  481. MuLawEncoding::MuLawEncoding(int32_t quantization_channels) : data_(std::make_shared<Data>(quantization_channels)) {}
  482. std::shared_ptr<TensorOperation> MuLawEncoding::Parse() {
  483. return std::make_shared<MuLawEncodingOperation>(data_->quantization_channels_);
  484. }
  485. // Overdrive Transform Operation.
  486. struct Overdrive::Data {
  487. Data(float gain, float color) : gain_(gain), color_(color) {}
  488. float gain_;
  489. float color_;
  490. };
  491. Overdrive::Overdrive(float gain, float color) : data_(std::make_shared<Data>(gain, color)) {}
  492. std::shared_ptr<TensorOperation> Overdrive::Parse() {
  493. return std::make_shared<OverdriveOperation>(data_->gain_, data_->color_);
  494. }
  495. // Phaser Transform Operation.
  496. struct Phaser::Data {
  497. Data(int32_t sample_rate, float gain_in, float gain_out, float delay_ms, float decay, float mod_speed,
  498. bool sinusoidal)
  499. : sample_rate_(sample_rate),
  500. gain_in_(gain_in),
  501. gain_out_(gain_out),
  502. delay_ms_(delay_ms),
  503. decay_(decay),
  504. mod_speed_(mod_speed),
  505. sinusoidal_(sinusoidal) {}
  506. int32_t sample_rate_;
  507. float gain_in_;
  508. float gain_out_;
  509. float delay_ms_;
  510. float decay_;
  511. float mod_speed_;
  512. bool sinusoidal_;
  513. };
  514. Phaser::Phaser(int32_t sample_rate, float gain_in, float gain_out, float delay_ms, float decay, float mod_speed,
  515. bool sinusoidal)
  516. : data_(std::make_shared<Data>(sample_rate, gain_in, gain_out, delay_ms, decay, mod_speed, sinusoidal)) {}
  517. std::shared_ptr<TensorOperation> Phaser::Parse() {
  518. return std::make_shared<PhaserOperation>(data_->sample_rate_, data_->gain_in_, data_->gain_out_, data_->delay_ms_,
  519. data_->decay_, data_->mod_speed_, data_->sinusoidal_);
  520. }
  521. // PhaseVocoder Transofrm Operation.
  522. struct PhaseVocoder::Data {
  523. Data(float rate, const MSTensor &phase_advance) : rate_(rate), phase_advance_(phase_advance) {}
  524. float rate_;
  525. MSTensor phase_advance_;
  526. };
  527. PhaseVocoder::PhaseVocoder(float rate, const MSTensor &phase_advance)
  528. : data_(std::make_shared<Data>(rate, phase_advance)) {}
  529. std::shared_ptr<TensorOperation> PhaseVocoder::Parse() {
  530. std::shared_ptr<Tensor> phase_advance;
  531. Status rc = Tensor::CreateFromMSTensor(data_->phase_advance_, &phase_advance);
  532. if (rc.IsError()) {
  533. MS_LOG(ERROR) << "Error creating phase_vocoder constant tensor." << rc;
  534. return nullptr;
  535. }
  536. return std::make_shared<PhaseVocoderOperation>(data_->rate_, phase_advance);
  537. }
  538. // RiaaBiquad Transform Operation.
  539. struct RiaaBiquad::Data {
  540. explicit Data(int32_t sample_rate) : sample_rate_(sample_rate) {}
  541. int32_t sample_rate_;
  542. };
  543. RiaaBiquad::RiaaBiquad(int32_t sample_rate) : data_(std::make_shared<Data>(sample_rate)) {}
  544. std::shared_ptr<TensorOperation> RiaaBiquad::Parse() {
  545. return std::make_shared<RiaaBiquadOperation>(data_->sample_rate_);
  546. }
  547. // SlidingWindowCmn Transform Operation.
  548. struct SlidingWindowCmn::Data {
  549. Data(int32_t cmn_window, int32_t min_cmn_window, bool center, bool norm_vars)
  550. : cmn_window_(cmn_window), min_cmn_window_(min_cmn_window), center_(center), norm_vars_(norm_vars) {}
  551. int32_t cmn_window_;
  552. int32_t min_cmn_window_;
  553. bool center_;
  554. bool norm_vars_;
  555. };
  556. SlidingWindowCmn::SlidingWindowCmn(int32_t cmn_window, int32_t min_cmn_window, bool center, bool norm_vars)
  557. : data_(std::make_shared<Data>(cmn_window, min_cmn_window, center, norm_vars)) {}
  558. std::shared_ptr<TensorOperation> SlidingWindowCmn::Parse() {
  559. return std::make_shared<SlidingWindowCmnOperation>(data_->cmn_window_, data_->min_cmn_window_, data_->center_,
  560. data_->norm_vars_);
  561. }
  562. // Spectrogram Transform Operation.
  563. struct Spectrogram::Data {
  564. Data(int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window, float power,
  565. bool normalized, bool center, BorderType pad_mode, bool onesided)
  566. : n_fft_(n_fft),
  567. win_length_(win_length),
  568. hop_length_(hop_length),
  569. pad_(pad),
  570. window_(window),
  571. power_(power),
  572. normalized_(normalized),
  573. center_(center),
  574. pad_mode_(pad_mode),
  575. onesided_(onesided) {}
  576. int32_t n_fft_;
  577. int32_t win_length_;
  578. int32_t hop_length_;
  579. int32_t pad_;
  580. WindowType window_;
  581. float power_;
  582. bool normalized_;
  583. bool center_;
  584. BorderType pad_mode_;
  585. bool onesided_;
  586. };
  587. Spectrogram::Spectrogram(int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window,
  588. float power, bool normalized, bool center, BorderType pad_mode, bool onesided)
  589. : data_(std::make_shared<Data>(n_fft, win_length, hop_length, pad, window, power, normalized, center, pad_mode,
  590. onesided)) {}
  591. std::shared_ptr<TensorOperation> Spectrogram::Parse() {
  592. return std::make_shared<SpectrogramOperation>(data_->n_fft_, data_->win_length_, data_->hop_length_, data_->pad_,
  593. data_->window_, data_->power_, data_->normalized_, data_->center_,
  594. data_->pad_mode_, data_->onesided_);
  595. }
  596. // SpectralCentroid Transform Operation.
  597. struct SpectralCentroid::Data {
  598. Data(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window)
  599. : sample_rate_(sample_rate),
  600. n_fft_(n_fft),
  601. win_length_(win_length),
  602. hop_length_(hop_length),
  603. pad_(pad),
  604. window_(window) {}
  605. int32_t sample_rate_;
  606. int32_t n_fft_;
  607. int32_t win_length_;
  608. int32_t hop_length_;
  609. int32_t pad_;
  610. WindowType window_;
  611. };
  612. SpectralCentroid::SpectralCentroid(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length,
  613. int32_t pad, WindowType window)
  614. : data_(std::make_shared<Data>(sample_rate, n_fft, win_length, hop_length, pad, window)) {}
  615. std::shared_ptr<TensorOperation> SpectralCentroid::Parse() {
  616. return std::make_shared<SpectralCentroidOperation>(data_->sample_rate_, data_->n_fft_, data_->win_length_,
  617. data_->hop_length_, data_->pad_, data_->window_);
  618. }
  619. // TimeMasking Transform Operation.
  620. struct TimeMasking::Data {
  621. Data(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  622. : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {}
  623. bool iid_masks_;
  624. int32_t time_mask_param_;
  625. int32_t mask_start_;
  626. float mask_value_;
  627. };
  628. TimeMasking::TimeMasking(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  629. : data_(std::make_shared<Data>(iid_masks, time_mask_param, mask_start, mask_value)) {}
  630. std::shared_ptr<TensorOperation> TimeMasking::Parse() {
  631. return std::make_shared<TimeMaskingOperation>(data_->iid_masks_, data_->time_mask_param_, data_->mask_start_,
  632. data_->mask_value_);
  633. }
  634. // TimeStretch Transform Operation.
  635. struct TimeStretch::Data {
  636. explicit Data(float hop_length, int32_t n_freq, float fixed_rate)
  637. : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
  638. float hop_length_;
  639. int32_t n_freq_;
  640. float fixed_rate_;
  641. };
  642. TimeStretch::TimeStretch(float hop_length, int32_t n_freq, float fixed_rate)
  643. : data_(std::make_shared<Data>(hop_length, n_freq, fixed_rate)) {}
  644. std::shared_ptr<TensorOperation> TimeStretch::Parse() {
  645. return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
  646. }
  647. // TrebleBiquad Transform Operation.
  648. struct TrebleBiquad::Data {
  649. Data(int32_t sample_rate, float gain, float central_freq, float Q)
  650. : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
  651. int32_t sample_rate_;
  652. float gain_;
  653. float central_freq_;
  654. float Q_;
  655. };
  656. TrebleBiquad::TrebleBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
  657. : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
  658. std::shared_ptr<TensorOperation> TrebleBiquad::Parse() {
  659. return std::make_shared<TrebleBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
  660. }
  661. // Vol Transform Operation.
  662. struct Vol::Data {
  663. Data(float gain, GainType gain_type) : gain_(gain), gain_type_(gain_type) {}
  664. float gain_;
  665. GainType gain_type_;
  666. };
  667. Vol::Vol(float gain, GainType gain_type) : data_(std::make_shared<Data>(gain, gain_type)) {}
  668. std::shared_ptr<TensorOperation> Vol::Parse() {
  669. return std::make_shared<VolOperation>(data_->gain_, data_->gain_type_);
  670. }
  671. } // namespace audio
  672. } // namespace dataset
  673. } // namespace mindspore