You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

audio.cc 32 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. /**
  2. * Copyright 2021-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/include/dataset/audio.h"
  17. #include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
  18. #include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
  19. #include "minddata/dataset/audio/ir/kernels/angle_ir.h"
  20. #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
  21. #include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
  22. #include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
  23. #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
  24. #include "minddata/dataset/audio/ir/kernels/biquad_ir.h"
  25. #include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h"
  26. #include "minddata/dataset/audio/ir/kernels/compute_deltas_ir.h"
  27. #include "minddata/dataset/audio/ir/kernels/contrast_ir.h"
  28. #include "minddata/dataset/audio/ir/kernels/db_to_amplitude_ir.h"
  29. #include "minddata/dataset/audio/ir/kernels/dc_shift_ir.h"
  30. #include "minddata/dataset/audio/ir/kernels/deemph_biquad_ir.h"
  31. #include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h"
  32. #include "minddata/dataset/audio/ir/kernels/dither_ir.h"
  33. #include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h"
  34. #include "minddata/dataset/audio/ir/kernels/fade_ir.h"
  35. #include "minddata/dataset/audio/ir/kernels/flanger_ir.h"
  36. #include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
  37. #include "minddata/dataset/audio/ir/kernels/gain_ir.h"
  38. #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
  39. #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
  40. #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
  41. #include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
  42. #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h"
  43. #include "minddata/dataset/audio/ir/kernels/mask_along_axis_ir.h"
  44. #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h"
  45. #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
  46. #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h"
  47. #include "minddata/dataset/audio/ir/kernels/overdrive_ir.h"
  48. #include "minddata/dataset/audio/ir/kernels/phase_vocoder_ir.h"
  49. #include "minddata/dataset/audio/ir/kernels/phaser_ir.h"
  50. #include "minddata/dataset/audio/ir/kernels/riaa_biquad_ir.h"
  51. #include "minddata/dataset/audio/ir/kernels/sliding_window_cmn_ir.h"
  52. #include "minddata/dataset/audio/ir/kernels/spectral_centroid_ir.h"
  53. #include "minddata/dataset/audio/ir/kernels/spectrogram_ir.h"
  54. #include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
  55. #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
  56. #include "minddata/dataset/audio/ir/kernels/treble_biquad_ir.h"
  57. #include "minddata/dataset/audio/ir/kernels/vol_ir.h"
  58. #include "minddata/dataset/audio/kernels/audio_utils.h"
  59. namespace mindspore {
  60. namespace dataset {
  61. namespace audio {
  62. // AllpassBiquad Transform Operation.
  63. struct AllpassBiquad::Data {
  64. Data(int32_t sample_rate, float central_freq, float Q)
  65. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  66. int32_t sample_rate_;
  67. float central_freq_;
  68. float Q_;
  69. };
  70. AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q)
  71. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  72. std::shared_ptr<TensorOperation> AllpassBiquad::Parse() {
  73. return std::make_shared<AllpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  74. }
  75. // AmplitudeToDB Transform Operation.
  76. struct AmplitudeToDB::Data {
  77. Data(ScaleType stype, float ref_value, float amin, float top_db)
  78. : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
  79. ScaleType stype_;
  80. float ref_value_;
  81. float amin_;
  82. float top_db_;
  83. };
  84. AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db)
  85. : data_(std::make_shared<Data>(stype, ref_value, amin, top_db)) {}
  86. std::shared_ptr<TensorOperation> AmplitudeToDB::Parse() {
  87. return std::make_shared<AmplitudeToDBOperation>(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_);
  88. }
  89. // Angle Transform Operation.
  90. Angle::Angle() = default;
  91. std::shared_ptr<TensorOperation> Angle::Parse() { return std::make_shared<AngleOperation>(); }
  92. // BandBiquad Transform Operation.
  93. struct BandBiquad::Data {
  94. Data(int32_t sample_rate, float central_freq, float Q, bool noise)
  95. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), noise_(noise) {}
  96. int32_t sample_rate_;
  97. float central_freq_;
  98. float Q_;
  99. bool noise_;
  100. };
  101. BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool noise)
  102. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, noise)) {}
  103. std::shared_ptr<TensorOperation> BandBiquad::Parse() {
  104. return std::make_shared<BandBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_);
  105. }
  106. // BandpassBiquad Transform Operation.
  107. struct BandpassBiquad::Data {
  108. Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  109. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {}
  110. int32_t sample_rate_;
  111. float central_freq_;
  112. float Q_;
  113. bool const_skirt_gain_;
  114. };
  115. BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  116. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, const_skirt_gain)) {}
  117. std::shared_ptr<TensorOperation> BandpassBiquad::Parse() {
  118. return std::make_shared<BandpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_,
  119. data_->const_skirt_gain_);
  120. }
  121. // BandrejectBiquad Transform Operation.
  122. struct BandrejectBiquad::Data {
  123. Data(int32_t sample_rate, float central_freq, float Q)
  124. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  125. int32_t sample_rate_;
  126. float central_freq_;
  127. float Q_;
  128. };
  129. BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q)
  130. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  131. std::shared_ptr<TensorOperation> BandrejectBiquad::Parse() {
  132. return std::make_shared<BandrejectBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  133. }
  134. // BassBiquad Transform Operation.
  135. struct BassBiquad::Data {
  136. Data(int32_t sample_rate, float gain, float central_freq, float Q)
  137. : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
  138. int32_t sample_rate_;
  139. float gain_;
  140. float central_freq_;
  141. float Q_;
  142. };
  143. BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
  144. : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
  145. std::shared_ptr<TensorOperation> BassBiquad::Parse() {
  146. return std::make_shared<BassBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
  147. }
  148. // Biquad Transform Operation.
  149. struct Biquad::Data {
  150. Data(float b0, float b1, float b2, float a0, float a1, float a2)
  151. : b0_(b0), b1_(b1), b2_(b2), a0_(a0), a1_(a1), a2_(a2) {}
  152. float b0_;
  153. float b1_;
  154. float b2_;
  155. float a0_;
  156. float a1_;
  157. float a2_;
  158. };
  159. Biquad::Biquad(float b0, float b1, float b2, float a0, float a1, float a2)
  160. : data_(std::make_shared<Data>(b0, b1, b2, a0, a1, a2)) {}
  161. std::shared_ptr<TensorOperation> Biquad::Parse() {
  162. return std::make_shared<BiquadOperation>(data_->b0_, data_->b1_, data_->b2_, data_->a0_, data_->a1_, data_->a1_);
  163. }
  164. // ComplexNorm Transform Operation.
  165. struct ComplexNorm::Data {
  166. explicit Data(float power) : power_(power) {}
  167. float power_;
  168. };
  169. ComplexNorm::ComplexNorm(float power) : data_(std::make_shared<Data>(power)) {}
  170. std::shared_ptr<TensorOperation> ComplexNorm::Parse() { return std::make_shared<ComplexNormOperation>(data_->power_); }
  171. // ComputeDeltas Transform Operation.
  172. struct ComputeDeltas::Data {
  173. Data(int32_t win_length, BorderType pad_mode) : win_length_(win_length), pad_mode_(pad_mode) {}
  174. int32_t win_length_;
  175. BorderType pad_mode_;
  176. };
  177. ComputeDeltas::ComputeDeltas(int32_t win_length, BorderType pad_mode)
  178. : data_(std::make_shared<Data>(win_length, pad_mode)) {}
  179. std::shared_ptr<TensorOperation> ComputeDeltas::Parse() {
  180. return std::make_shared<ComputeDeltasOperation>(data_->win_length_, data_->pad_mode_);
  181. }
  182. // Contrast Transform Operation.
  183. struct Contrast::Data {
  184. explicit Data(float enhancement_amount) : enhancement_amount_(enhancement_amount) {}
  185. float enhancement_amount_;
  186. };
  187. Contrast::Contrast(float enhancement_amount) : data_(std::make_shared<Data>(enhancement_amount)) {}
  188. std::shared_ptr<TensorOperation> Contrast::Parse() {
  189. return std::make_shared<ContrastOperation>(data_->enhancement_amount_);
  190. }
  191. // DBToAmplitude Transform Operation.
  192. struct DBToAmplitude::Data {
  193. explicit Data(float ref, float power) : ref_(ref), power_(power) {}
  194. float ref_;
  195. float power_;
  196. };
  197. DBToAmplitude::DBToAmplitude(float ref, float power) : data_(std::make_shared<Data>(ref, power)) {}
  198. std::shared_ptr<TensorOperation> DBToAmplitude::Parse() {
  199. return std::make_shared<DBToAmplitudeOperation>(data_->ref_, data_->power_);
  200. }
  201. // DCShift Transform Operation.
  202. struct DCShift::Data {
  203. Data(float shift, float limiter_gain) : shift_(shift), limiter_gain_(limiter_gain) {}
  204. float shift_;
  205. float limiter_gain_;
  206. };
  207. DCShift::DCShift(float shift) : data_(std::make_shared<Data>(shift, shift)) {}
  208. DCShift::DCShift(float shift, float limiter_gain) : data_(std::make_shared<Data>(shift, limiter_gain)) {}
  209. std::shared_ptr<TensorOperation> DCShift::Parse() {
  210. return std::make_shared<DCShiftOperation>(data_->shift_, data_->limiter_gain_);
  211. }
  212. Status CreateDct(mindspore::MSTensor *output, int32_t n_mfcc, int32_t n_mels, NormMode norm) {
  213. RETURN_UNEXPECTED_IF_NULL(output);
  214. CHECK_FAIL_RETURN_UNEXPECTED(n_mfcc > 0, "CreateDct: n_mfcc must be greater than 0, got: " + std::to_string(n_mfcc));
  215. CHECK_FAIL_RETURN_UNEXPECTED(n_mels > 0, "CreateDct: n_mels must be greater than 0, got: " + std::to_string(n_mels));
  216. std::shared_ptr<dataset::Tensor> dct;
  217. RETURN_IF_NOT_OK(Dct(&dct, n_mfcc, n_mels, norm));
  218. CHECK_FAIL_RETURN_UNEXPECTED(dct->HasData(), "CreateDct: get an empty tensor with shape " + dct->shape().ToString());
  219. *output = mindspore::MSTensor(std::make_shared<DETensor>(dct));
  220. return Status::OK();
  221. }
  222. // DeemphBiquad Transform Operation.
  223. struct DeemphBiquad::Data {
  224. explicit Data(int32_t sample_rate) : sample_rate_(sample_rate) {}
  225. int32_t sample_rate_;
  226. };
  227. DeemphBiquad::DeemphBiquad(int32_t sample_rate) : data_(std::make_shared<Data>(sample_rate)) {}
  228. std::shared_ptr<TensorOperation> DeemphBiquad::Parse() {
  229. return std::make_shared<DeemphBiquadOperation>(data_->sample_rate_);
  230. }
  231. // DetectPitchFrequency Transform Operation.
  232. struct DetectPitchFrequency::Data {
  233. Data(int32_t sample_rate, float frame_time, int32_t win_length, int32_t freq_low, int32_t freq_high)
  234. : sample_rate_(sample_rate),
  235. frame_time_(frame_time),
  236. win_length_(win_length),
  237. freq_low_(freq_low),
  238. freq_high_(freq_high) {}
  239. int32_t sample_rate_;
  240. float frame_time_;
  241. int32_t win_length_;
  242. int32_t freq_low_;
  243. int32_t freq_high_;
  244. };
  245. DetectPitchFrequency::DetectPitchFrequency(int32_t sample_rate, float frame_time, int32_t win_length, int32_t freq_low,
  246. int32_t freq_high)
  247. : data_(std::make_shared<Data>(sample_rate, frame_time, win_length, freq_low, freq_high)) {}
  248. std::shared_ptr<TensorOperation> DetectPitchFrequency::Parse() {
  249. return std::make_shared<DetectPitchFrequencyOperation>(data_->sample_rate_, data_->frame_time_, data_->win_length_,
  250. data_->freq_low_, data_->freq_high_);
  251. }
  252. // Dither Transform Operation.
  253. struct Dither::Data {
  254. Data(DensityFunction density_function, bool noise_shaping)
  255. : density_function_(density_function), noise_shaping_(noise_shaping) {}
  256. DensityFunction density_function_;
  257. bool noise_shaping_;
  258. };
  259. Dither::Dither(DensityFunction density_function, bool noise_shaping)
  260. : data_(std::make_shared<Data>(density_function, noise_shaping)) {}
  261. std::shared_ptr<TensorOperation> Dither::Parse() {
  262. return std::make_shared<DitherOperation>(data_->density_function_, data_->noise_shaping_);
  263. }
  264. // EqualizerBiquad Transform Operation.
  265. struct EqualizerBiquad::Data {
  266. Data(int32_t sample_rate, float center_freq, float gain, float Q)
  267. : sample_rate_(sample_rate), center_freq_(center_freq), gain_(gain), Q_(Q) {}
  268. int32_t sample_rate_;
  269. float center_freq_;
  270. float gain_;
  271. float Q_;
  272. };
  273. EqualizerBiquad::EqualizerBiquad(int32_t sample_rate, float center_freq, float gain, float Q)
  274. : data_(std::make_shared<Data>(sample_rate, center_freq, gain, Q)) {}
  275. std::shared_ptr<TensorOperation> EqualizerBiquad::Parse() {
  276. return std::make_shared<EqualizerBiquadOperation>(data_->sample_rate_, data_->center_freq_, data_->gain_, data_->Q_);
  277. }
  278. // Fade Transform Operation.
  279. struct Fade::Data {
  280. Data(int32_t fade_in_len, int32_t fade_out_len, FadeShape fade_shape)
  281. : fade_in_len_(fade_in_len), fade_out_len_(fade_out_len), fade_shape_(fade_shape) {}
  282. int32_t fade_in_len_;
  283. int32_t fade_out_len_;
  284. FadeShape fade_shape_;
  285. };
  286. Fade::Fade(int32_t fade_in_len, int32_t fade_out_len, FadeShape fade_shape)
  287. : data_(std::make_shared<Data>(fade_in_len, fade_out_len, fade_shape)) {}
  288. std::shared_ptr<TensorOperation> Fade::Parse() {
  289. return std::make_shared<FadeOperation>(data_->fade_in_len_, data_->fade_out_len_, data_->fade_shape_);
  290. }
  291. // Flanger Transform Operation.
  292. struct Flanger::Data {
  293. Data(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
  294. Modulation modulation, Interpolation interpolation)
  295. : sample_rate_(sample_rate),
  296. delay_(delay),
  297. depth_(depth),
  298. regen_(regen),
  299. width_(width),
  300. speed_(speed),
  301. phase_(phase),
  302. modulation_(modulation),
  303. interpolation_(interpolation) {}
  304. int32_t sample_rate_;
  305. float delay_;
  306. float depth_;
  307. float regen_;
  308. float width_;
  309. float speed_;
  310. float phase_;
  311. Modulation modulation_;
  312. Interpolation interpolation_;
  313. };
  314. Flanger::Flanger(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
  315. Modulation modulation, Interpolation interpolation)
  316. : data_(std::make_shared<Data>(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation)) {}
  317. std::shared_ptr<TensorOperation> Flanger::Parse() {
  318. return std::make_shared<FlangerOperation>(data_->sample_rate_, data_->delay_, data_->depth_, data_->regen_,
  319. data_->width_, data_->speed_, data_->phase_, data_->modulation_,
  320. data_->interpolation_);
  321. }
  322. // FrequencyMasking Transform Operation.
  323. struct FrequencyMasking::Data {
  324. Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  325. : iid_masks_(iid_masks),
  326. frequency_mask_param_(frequency_mask_param),
  327. mask_start_(mask_start),
  328. mask_value_(mask_value) {}
  329. bool iid_masks_;
  330. int32_t frequency_mask_param_;
  331. int32_t mask_start_;
  332. float mask_value_;
  333. };
  334. FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  335. : data_(std::make_shared<Data>(iid_masks, frequency_mask_param, mask_start, mask_value)) {}
  336. std::shared_ptr<TensorOperation> FrequencyMasking::Parse() {
  337. return std::make_shared<FrequencyMaskingOperation>(data_->iid_masks_, data_->frequency_mask_param_,
  338. data_->mask_start_, data_->mask_value_);
  339. }
  340. // Gain Transform Operation.
  341. struct Gain::Data {
  342. explicit Data(float gain_db) : gain_db_(gain_db) {}
  343. float gain_db_;
  344. };
  345. Gain::Gain(float gain_db) : data_(std::make_shared<Data>(gain_db)) {}
  346. std::shared_ptr<TensorOperation> Gain::Parse() { return std::make_shared<GainOperation>(data_->gain_db_); }
  347. // HighpassBiquad Transform Operation.
  348. struct HighpassBiquad::Data {
  349. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  350. int32_t sample_rate_;
  351. float cutoff_freq_;
  352. float Q_;
  353. };
  354. HighpassBiquad::HighpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  355. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  356. std::shared_ptr<TensorOperation> HighpassBiquad::Parse() {
  357. return std::make_shared<HighpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  358. }
  359. // LFilter Transform Operation.
  360. struct LFilter::Data {
  361. Data(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
  362. : a_coeffs_(a_coeffs), b_coeffs_(b_coeffs), clamp_(clamp) {}
  363. std::vector<float> a_coeffs_;
  364. std::vector<float> b_coeffs_;
  365. bool clamp_;
  366. };
  367. LFilter::LFilter(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
  368. : data_(std::make_shared<Data>(a_coeffs, b_coeffs, clamp)) {}
  369. std::shared_ptr<TensorOperation> LFilter::Parse() {
  370. return std::make_shared<LFilterOperation>(data_->a_coeffs_, data_->b_coeffs_, data_->clamp_);
  371. }
  372. // LowpassBiquad Transform Operation.
  373. struct LowpassBiquad::Data {
  374. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  375. int32_t sample_rate_;
  376. float cutoff_freq_;
  377. float Q_;
  378. };
  379. LowpassBiquad::LowpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  380. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  381. std::shared_ptr<TensorOperation> LowpassBiquad::Parse() {
  382. return std::make_shared<LowpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  383. }
  384. // Magphase Transform Operation.
  385. struct Magphase::Data {
  386. explicit Data(float power) : power_(power) {}
  387. float power_;
  388. };
  389. Magphase::Magphase(float power) : data_(std::make_shared<Data>(power)) {}
  390. std::shared_ptr<TensorOperation> Magphase::Parse() { return std::make_shared<MagphaseOperation>(data_->power_); }
  391. // MaskAlongAxis Transform Operation.
  392. struct MaskAlongAxis::Data {
  393. Data(int32_t mask_start, int32_t mask_width, float mask_value, int32_t axis)
  394. : mask_start_(mask_start), mask_width_(mask_width), mask_value_(mask_value), axis_(axis) {}
  395. int32_t mask_start_;
  396. int32_t mask_width_;
  397. float mask_value_;
  398. int32_t axis_;
  399. };
  400. MaskAlongAxis::MaskAlongAxis(int32_t mask_start, int32_t mask_width, float mask_value, int32_t axis)
  401. : data_(std::make_shared<Data>(mask_start, mask_width, mask_value, axis)) {}
  402. std::shared_ptr<TensorOperation> MaskAlongAxis::Parse() {
  403. return std::make_shared<MaskAlongAxisOperation>(data_->mask_start_, data_->mask_width_, data_->mask_value_,
  404. data_->axis_);
  405. }
  406. // MaskAlongAxisIID Transform Operation.
  407. struct MaskAlongAxisIID::Data {
  408. Data(int32_t mask_param, float mask_value, int32_t axis)
  409. : mask_param_(mask_param), mask_value_(mask_value), axis_(axis) {}
  410. int32_t mask_param_;
  411. float mask_value_;
  412. int32_t axis_;
  413. };
  414. MaskAlongAxisIID::MaskAlongAxisIID(int32_t mask_param, float mask_value, int32_t axis)
  415. : data_(std::make_shared<Data>(mask_param, mask_value, axis)) {}
  416. std::shared_ptr<TensorOperation> MaskAlongAxisIID::Parse() {
  417. return std::make_shared<MaskAlongAxisIIDOperation>(data_->mask_param_, data_->mask_value_, data_->axis_);
  418. }
  419. // MelScale Transform Operation.
  420. struct MelScale::Data {
  421. Data(int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t n_stft, NormType norm, MelType mel_type)
  422. : n_mels_(n_mels),
  423. sample_rate_(sample_rate),
  424. f_min_(f_min),
  425. f_max_(f_max),
  426. n_stft_(n_stft),
  427. norm_(norm),
  428. mel_type_(mel_type) {}
  429. int32_t n_mels_;
  430. int32_t sample_rate_;
  431. float f_min_;
  432. float f_max_;
  433. int32_t n_stft_;
  434. NormType norm_;
  435. MelType mel_type_;
  436. };
  437. MelScale::MelScale(int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t n_stft, NormType norm,
  438. MelType mel_type)
  439. : data_(std::make_shared<Data>(n_mels, sample_rate, f_min, f_max, n_stft, norm, mel_type)) {}
  440. std::shared_ptr<TensorOperation> MelScale::Parse() {
  441. return std::make_shared<MelScaleOperation>(data_->n_mels_, data_->sample_rate_, data_->f_min_, data_->f_max_,
  442. data_->n_stft_, data_->norm_, data_->mel_type_);
  443. }
  444. // MelscaleFbanks Function.
  445. Status MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels, int32_t sample_rate,
  446. NormType norm, MelType mel_type) {
  447. RETURN_UNEXPECTED_IF_NULL(output);
  448. CHECK_FAIL_RETURN_UNEXPECTED(n_freqs > 0,
  449. "MelscaleFbanks: n_freqs must be greater than 0, got: " + std::to_string(n_freqs));
  450. CHECK_FAIL_RETURN_UNEXPECTED(f_min >= 0, "MelscaleFbanks: f_min must be non negative, got: " + std::to_string(f_min));
  451. CHECK_FAIL_RETURN_UNEXPECTED(f_max > 0,
  452. "MelscaleFbanks: f_max must be greater than 0, got: " + std::to_string(f_max));
  453. CHECK_FAIL_RETURN_UNEXPECTED(n_mels > 0,
  454. "MelscaleFbanks: n_mels must be greater than 0, got: " + std::to_string(n_mels));
  455. CHECK_FAIL_RETURN_UNEXPECTED(
  456. sample_rate > 0, "MelscaleFbanks: sample_rate must be greater than 0, got: " + std::to_string(sample_rate));
  457. CHECK_FAIL_RETURN_UNEXPECTED(f_max > f_min, "MelscaleFbanks: f_max must be greater than f_min, got: f_min = " +
  458. std::to_string(f_min) + ", while f_max = " + std::to_string(f_max));
  459. std::shared_ptr<dataset::Tensor> fb;
  460. RETURN_IF_NOT_OK(CreateFbanks(&fb, n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type));
  461. CHECK_FAIL_RETURN_UNEXPECTED(fb->HasData(),
  462. "MelscaleFbanks: get an empty tensor with shape " + fb->shape().ToString());
  463. *output = mindspore::MSTensor(std::make_shared<DETensor>(fb));
  464. return Status::OK();
  465. }
  466. // MuLawDecoding Transform Operation.
  467. struct MuLawDecoding::Data {
  468. explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
  469. int32_t quantization_channels_;
  470. };
  471. MuLawDecoding::MuLawDecoding(int32_t quantization_channels) : data_(std::make_shared<Data>(quantization_channels)) {}
  472. std::shared_ptr<TensorOperation> MuLawDecoding::Parse() {
  473. return std::make_shared<MuLawDecodingOperation>(data_->quantization_channels_);
  474. }
  475. // MuLawEncoding Transform Operation.
  476. struct MuLawEncoding::Data {
  477. explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
  478. int32_t quantization_channels_;
  479. };
  480. MuLawEncoding::MuLawEncoding(int32_t quantization_channels) : data_(std::make_shared<Data>(quantization_channels)) {}
  481. std::shared_ptr<TensorOperation> MuLawEncoding::Parse() {
  482. return std::make_shared<MuLawEncodingOperation>(data_->quantization_channels_);
  483. }
  484. // Overdrive Transform Operation.
  485. struct Overdrive::Data {
  486. Data(float gain, float color) : gain_(gain), color_(color) {}
  487. float gain_;
  488. float color_;
  489. };
  490. Overdrive::Overdrive(float gain, float color) : data_(std::make_shared<Data>(gain, color)) {}
  491. std::shared_ptr<TensorOperation> Overdrive::Parse() {
  492. return std::make_shared<OverdriveOperation>(data_->gain_, data_->color_);
  493. }
  494. // Phaser Transform Operation.
  495. struct Phaser::Data {
  496. Data(int32_t sample_rate, float gain_in, float gain_out, float delay_ms, float decay, float mod_speed,
  497. bool sinusoidal)
  498. : sample_rate_(sample_rate),
  499. gain_in_(gain_in),
  500. gain_out_(gain_out),
  501. delay_ms_(delay_ms),
  502. decay_(decay),
  503. mod_speed_(mod_speed),
  504. sinusoidal_(sinusoidal) {}
  505. int32_t sample_rate_;
  506. float gain_in_;
  507. float gain_out_;
  508. float delay_ms_;
  509. float decay_;
  510. float mod_speed_;
  511. bool sinusoidal_;
  512. };
  513. Phaser::Phaser(int32_t sample_rate, float gain_in, float gain_out, float delay_ms, float decay, float mod_speed,
  514. bool sinusoidal)
  515. : data_(std::make_shared<Data>(sample_rate, gain_in, gain_out, delay_ms, decay, mod_speed, sinusoidal)) {}
  516. std::shared_ptr<TensorOperation> Phaser::Parse() {
  517. return std::make_shared<PhaserOperation>(data_->sample_rate_, data_->gain_in_, data_->gain_out_, data_->delay_ms_,
  518. data_->decay_, data_->mod_speed_, data_->sinusoidal_);
  519. }
  520. // PhaseVocoder Transofrm Operation.
  521. struct PhaseVocoder::Data {
  522. Data(float rate, const MSTensor &phase_advance) : rate_(rate), phase_advance_(phase_advance) {}
  523. float rate_;
  524. MSTensor phase_advance_;
  525. };
  526. PhaseVocoder::PhaseVocoder(float rate, const MSTensor &phase_advance)
  527. : data_(std::make_shared<Data>(rate, phase_advance)) {}
  528. std::shared_ptr<TensorOperation> PhaseVocoder::Parse() {
  529. std::shared_ptr<Tensor> phase_advance;
  530. Status rc = Tensor::CreateFromMSTensor(data_->phase_advance_, &phase_advance);
  531. if (rc.IsError()) {
  532. MS_LOG(ERROR) << "Error creating phase_vocoder constant tensor." << rc;
  533. return nullptr;
  534. }
  535. return std::make_shared<PhaseVocoderOperation>(data_->rate_, phase_advance);
  536. }
  537. // RiaaBiquad Transform Operation.
  538. struct RiaaBiquad::Data {
  539. explicit Data(int32_t sample_rate) : sample_rate_(sample_rate) {}
  540. int32_t sample_rate_;
  541. };
  542. RiaaBiquad::RiaaBiquad(int32_t sample_rate) : data_(std::make_shared<Data>(sample_rate)) {}
  543. std::shared_ptr<TensorOperation> RiaaBiquad::Parse() {
  544. return std::make_shared<RiaaBiquadOperation>(data_->sample_rate_);
  545. }
  546. // SlidingWindowCmn Transform Operation.
  547. struct SlidingWindowCmn::Data {
  548. Data(int32_t cmn_window, int32_t min_cmn_window, bool center, bool norm_vars)
  549. : cmn_window_(cmn_window), min_cmn_window_(min_cmn_window), center_(center), norm_vars_(norm_vars) {}
  550. int32_t cmn_window_;
  551. int32_t min_cmn_window_;
  552. bool center_;
  553. bool norm_vars_;
  554. };
  555. SlidingWindowCmn::SlidingWindowCmn(int32_t cmn_window, int32_t min_cmn_window, bool center, bool norm_vars)
  556. : data_(std::make_shared<Data>(cmn_window, min_cmn_window, center, norm_vars)) {}
  557. std::shared_ptr<TensorOperation> SlidingWindowCmn::Parse() {
  558. return std::make_shared<SlidingWindowCmnOperation>(data_->cmn_window_, data_->min_cmn_window_, data_->center_,
  559. data_->norm_vars_);
  560. }
  561. // Spectrogram Transform Operation.
  562. struct Spectrogram::Data {
  563. Data(int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window, float power,
  564. bool normalized, bool center, BorderType pad_mode, bool onesided)
  565. : n_fft_(n_fft),
  566. win_length_(win_length),
  567. hop_length_(hop_length),
  568. pad_(pad),
  569. window_(window),
  570. power_(power),
  571. normalized_(normalized),
  572. center_(center),
  573. pad_mode_(pad_mode),
  574. onesided_(onesided) {}
  575. int32_t n_fft_;
  576. int32_t win_length_;
  577. int32_t hop_length_;
  578. int32_t pad_;
  579. WindowType window_;
  580. float power_;
  581. bool normalized_;
  582. bool center_;
  583. BorderType pad_mode_;
  584. bool onesided_;
  585. };
  586. Spectrogram::Spectrogram(int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window,
  587. float power, bool normalized, bool center, BorderType pad_mode, bool onesided)
  588. : data_(std::make_shared<Data>(n_fft, win_length, hop_length, pad, window, power, normalized, center, pad_mode,
  589. onesided)) {}
  590. std::shared_ptr<TensorOperation> Spectrogram::Parse() {
  591. return std::make_shared<SpectrogramOperation>(data_->n_fft_, data_->win_length_, data_->hop_length_, data_->pad_,
  592. data_->window_, data_->power_, data_->normalized_, data_->center_,
  593. data_->pad_mode_, data_->onesided_);
  594. }
  595. // SpectralCentroid Transform Operation.
  596. struct SpectralCentroid::Data {
  597. Data(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window)
  598. : sample_rate_(sample_rate),
  599. n_fft_(n_fft),
  600. win_length_(win_length),
  601. hop_length_(hop_length),
  602. pad_(pad),
  603. window_(window) {}
  604. int32_t sample_rate_;
  605. int32_t n_fft_;
  606. int32_t win_length_;
  607. int32_t hop_length_;
  608. int32_t pad_;
  609. WindowType window_;
  610. };
  611. SpectralCentroid::SpectralCentroid(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length,
  612. int32_t pad, WindowType window)
  613. : data_(std::make_shared<Data>(sample_rate, n_fft, win_length, hop_length, pad, window)) {}
  614. std::shared_ptr<TensorOperation> SpectralCentroid::Parse() {
  615. return std::make_shared<SpectralCentroidOperation>(data_->sample_rate_, data_->n_fft_, data_->win_length_,
  616. data_->hop_length_, data_->pad_, data_->window_);
  617. }
  618. // TimeMasking Transform Operation.
  619. struct TimeMasking::Data {
  620. Data(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  621. : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {}
  622. bool iid_masks_;
  623. int32_t time_mask_param_;
  624. int32_t mask_start_;
  625. float mask_value_;
  626. };
  627. TimeMasking::TimeMasking(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  628. : data_(std::make_shared<Data>(iid_masks, time_mask_param, mask_start, mask_value)) {}
  629. std::shared_ptr<TensorOperation> TimeMasking::Parse() {
  630. return std::make_shared<TimeMaskingOperation>(data_->iid_masks_, data_->time_mask_param_, data_->mask_start_,
  631. data_->mask_value_);
  632. }
  633. // TimeStretch Transform Operation.
  634. struct TimeStretch::Data {
  635. explicit Data(float hop_length, int32_t n_freq, float fixed_rate)
  636. : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
  637. float hop_length_;
  638. int32_t n_freq_;
  639. float fixed_rate_;
  640. };
  641. TimeStretch::TimeStretch(float hop_length, int32_t n_freq, float fixed_rate)
  642. : data_(std::make_shared<Data>(hop_length, n_freq, fixed_rate)) {}
  643. std::shared_ptr<TensorOperation> TimeStretch::Parse() {
  644. return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
  645. }
  646. // TrebleBiquad Transform Operation.
  647. struct TrebleBiquad::Data {
  648. Data(int32_t sample_rate, float gain, float central_freq, float Q)
  649. : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
  650. int32_t sample_rate_;
  651. float gain_;
  652. float central_freq_;
  653. float Q_;
  654. };
  655. TrebleBiquad::TrebleBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
  656. : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
  657. std::shared_ptr<TensorOperation> TrebleBiquad::Parse() {
  658. return std::make_shared<TrebleBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
  659. }
  660. // Vol Transform Operation.
  661. struct Vol::Data {
  662. Data(float gain, GainType gain_type) : gain_(gain), gain_type_(gain_type) {}
  663. float gain_;
  664. GainType gain_type_;
  665. };
  666. Vol::Vol(float gain, GainType gain_type) : data_(std::make_shared<Data>(gain, gain_type)) {}
  667. std::shared_ptr<TensorOperation> Vol::Parse() {
  668. return std::make_shared<VolOperation>(data_->gain_, data_->gain_type_);
  669. }
  670. } // namespace audio
  671. } // namespace dataset
  672. } // namespace mindspore