You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

audio.cc 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/include/dataset/audio.h"
  17. #include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
  18. #include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
  19. #include "minddata/dataset/audio/ir/kernels/angle_ir.h"
  20. #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
  21. #include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
  22. #include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
  23. #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
  24. #include "minddata/dataset/audio/ir/kernels/biquad_ir.h"
  25. #include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h"
  26. #include "minddata/dataset/audio/ir/kernels/contrast_ir.h"
  27. #include "minddata/dataset/audio/ir/kernels/dc_shift_ir.h"
  28. #include "minddata/dataset/audio/ir/kernels/deemph_biquad_ir.h"
  29. #include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h"
  30. #include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
  31. #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
  32. #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
  33. #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
  34. #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
  35. #include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
  36. #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
  37. namespace mindspore {
  38. namespace dataset {
  39. namespace audio {
  40. // AllpassBiquad Transform Operation.
  41. struct AllpassBiquad::Data {
  42. Data(int32_t sample_rate, float central_freq, float Q)
  43. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  44. int32_t sample_rate_;
  45. float central_freq_;
  46. float Q_;
  47. };
  48. AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q)
  49. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  50. std::shared_ptr<TensorOperation> AllpassBiquad::Parse() {
  51. return std::make_shared<AllpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  52. }
  53. // AmplitudeToDB Transform Operation.
  54. struct AmplitudeToDB::Data {
  55. Data(ScaleType stype, float ref_value, float amin, float top_db)
  56. : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
  57. ScaleType stype_;
  58. float ref_value_;
  59. float amin_;
  60. float top_db_;
  61. };
  62. AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db)
  63. : data_(std::make_shared<Data>(stype, ref_value, amin, top_db)) {}
  64. std::shared_ptr<TensorOperation> AmplitudeToDB::Parse() {
  65. return std::make_shared<AmplitudeToDBOperation>(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_);
  66. }
  67. // Angle Transform Operation.
  68. Angle::Angle() {}
  69. std::shared_ptr<TensorOperation> Angle::Parse() { return std::make_shared<AngleOperation>(); }
  70. // BandBiquad Transform Operation.
  71. struct BandBiquad::Data {
  72. Data(int32_t sample_rate, float central_freq, float Q, bool noise)
  73. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), noise_(noise) {}
  74. int32_t sample_rate_;
  75. float central_freq_;
  76. float Q_;
  77. bool noise_;
  78. };
  79. BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool noise)
  80. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, noise)) {}
  81. std::shared_ptr<TensorOperation> BandBiquad::Parse() {
  82. return std::make_shared<BandBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_);
  83. }
  84. // BandpassBiquad Transform Operation.
  85. struct BandpassBiquad::Data {
  86. Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  87. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {}
  88. int32_t sample_rate_;
  89. float central_freq_;
  90. float Q_;
  91. bool const_skirt_gain_;
  92. };
  93. BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
  94. : data_(std::make_shared<Data>(sample_rate, central_freq, Q, const_skirt_gain)) {}
  95. std::shared_ptr<TensorOperation> BandpassBiquad::Parse() {
  96. return std::make_shared<BandpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_,
  97. data_->const_skirt_gain_);
  98. }
  99. // BandrejectBiquad Transform Operation.
  100. struct BandrejectBiquad::Data {
  101. Data(int32_t sample_rate, float central_freq, float Q)
  102. : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
  103. int32_t sample_rate_;
  104. float central_freq_;
  105. float Q_;
  106. };
  107. BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q)
  108. : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
  109. std::shared_ptr<TensorOperation> BandrejectBiquad::Parse() {
  110. return std::make_shared<BandrejectBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
  111. }
  112. // BassBiquad Transform Operation.
  113. struct BassBiquad::Data {
  114. Data(int32_t sample_rate, float gain, float central_freq, float Q)
  115. : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
  116. int32_t sample_rate_;
  117. float gain_;
  118. float central_freq_;
  119. float Q_;
  120. };
  121. BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
  122. : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
  123. std::shared_ptr<TensorOperation> BassBiquad::Parse() {
  124. return std::make_shared<BassBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
  125. }
  126. // Biquad Transform Operation.
  127. struct Biquad::Data {
  128. Data(float b0, float b1, float b2, float a0, float a1, float a2)
  129. : b0_(b0), b1_(b1), b2_(b2), a0_(a0), a1_(a1), a2_(a2) {}
  130. float b0_;
  131. float b1_;
  132. float b2_;
  133. float a0_;
  134. float a1_;
  135. float a2_;
  136. };
  137. Biquad::Biquad(float b0, float b1, float b2, float a0, float a1, float a2)
  138. : data_(std::make_shared<Data>(b0, b1, b2, a0, a1, a2)) {}
  139. std::shared_ptr<TensorOperation> Biquad::Parse() {
  140. return std::make_shared<BiquadOperation>(data_->b0_, data_->b1_, data_->b2_, data_->a0_, data_->a1_, data_->a1_);
  141. }
  142. // ComplexNorm Transform Operation.
  143. struct ComplexNorm::Data {
  144. explicit Data(float power) : power_(power) {}
  145. float power_;
  146. };
  147. ComplexNorm::ComplexNorm(float power) : data_(std::make_shared<Data>(power)) {}
  148. std::shared_ptr<TensorOperation> ComplexNorm::Parse() { return std::make_shared<ComplexNormOperation>(data_->power_); }
  149. // Contrast Transform Operation.
  150. struct Contrast::Data {
  151. explicit Data(float enhancement_amount) : enhancement_amount_(enhancement_amount) {}
  152. float enhancement_amount_;
  153. };
  154. Contrast::Contrast(float enhancement_amount) : data_(std::make_shared<Data>(enhancement_amount)) {}
  155. std::shared_ptr<TensorOperation> Contrast::Parse() {
  156. return std::make_shared<ContrastOperation>(data_->enhancement_amount_);
  157. }
  158. // DCShift Transform Operation.
  159. struct DCShift::Data {
  160. Data(float shift, float limiter_gain) : shift_(shift), limiter_gain_(limiter_gain) {}
  161. float limiter_gain_;
  162. float shift_;
  163. };
  164. DCShift::DCShift(float shift) : data_(std::make_shared<Data>(shift, shift)) {}
  165. DCShift::DCShift(float shift, float limiter_gain) : data_(std::make_shared<Data>(shift, limiter_gain)) {}
  166. std::shared_ptr<TensorOperation> DCShift::Parse() {
  167. return std::make_shared<DCShiftOperation>(data_->shift_, data_->limiter_gain_);
  168. }
  169. // DeemphBiquad Transform Operation.
  170. struct DeemphBiquad::Data {
  171. explicit Data(int32_t sample_rate) : sample_rate_(sample_rate) {}
  172. int32_t sample_rate_;
  173. };
  174. DeemphBiquad::DeemphBiquad(int32_t sample_rate) : data_(std::make_shared<Data>(sample_rate)) {}
  175. std::shared_ptr<TensorOperation> DeemphBiquad::Parse() {
  176. return std::make_shared<DeemphBiquadOperation>(data_->sample_rate_);
  177. }
  178. // EqualizerBiquad Transform Operation.
  179. struct EqualizerBiquad::Data {
  180. Data(int32_t sample_rate, float center_freq, float gain, float Q)
  181. : sample_rate_(sample_rate), center_freq_(center_freq), gain_(gain), Q_(Q) {}
  182. int32_t sample_rate_;
  183. float center_freq_;
  184. float gain_;
  185. float Q_;
  186. };
  187. EqualizerBiquad::EqualizerBiquad(int32_t sample_rate, float center_freq, float gain, float Q)
  188. : data_(std::make_shared<Data>(sample_rate, center_freq, gain, Q)) {}
  189. std::shared_ptr<TensorOperation> EqualizerBiquad::Parse() {
  190. return std::make_shared<EqualizerBiquadOperation>(data_->sample_rate_, data_->center_freq_, data_->gain_, data_->Q_);
  191. }
  192. // FrequencyMasking Transform Operation.
  193. struct FrequencyMasking::Data {
  194. Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  195. : iid_masks_(iid_masks),
  196. frequency_mask_param_(frequency_mask_param),
  197. mask_start_(mask_start),
  198. mask_value_(mask_value) {}
  199. int32_t frequency_mask_param_;
  200. int32_t mask_start_;
  201. bool iid_masks_;
  202. float mask_value_;
  203. };
  204. FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)
  205. : data_(std::make_shared<Data>(iid_masks, frequency_mask_param, mask_start, mask_value)) {}
  206. std::shared_ptr<TensorOperation> FrequencyMasking::Parse() {
  207. return std::make_shared<FrequencyMaskingOperation>(data_->iid_masks_, data_->frequency_mask_param_,
  208. data_->mask_start_, data_->mask_value_);
  209. }
  210. // HighpassBiquad Transform Operation.
  211. struct HighpassBiquad::Data {
  212. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  213. int32_t sample_rate_;
  214. float cutoff_freq_;
  215. float Q_;
  216. };
  217. HighpassBiquad::HighpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  218. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  219. std::shared_ptr<TensorOperation> HighpassBiquad::Parse() {
  220. return std::make_shared<HighpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  221. }
  222. // LFilter Transform Operation.
  223. struct LFilter::Data {
  224. Data(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
  225. : a_coeffs_(a_coeffs), b_coeffs_(b_coeffs), clamp_(clamp) {}
  226. std::vector<float> a_coeffs_;
  227. std::vector<float> b_coeffs_;
  228. bool clamp_;
  229. };
  230. LFilter::LFilter(std::vector<float> a_coeffs, std::vector<float> b_coeffs, bool clamp)
  231. : data_(std::make_shared<Data>(a_coeffs, b_coeffs, clamp)) {}
  232. std::shared_ptr<TensorOperation> LFilter::Parse() {
  233. return std::make_shared<LFilterOperation>(data_->a_coeffs_, data_->b_coeffs_, data_->clamp_);
  234. }
  235. // LowpassBiquad Transform Operation.
  236. struct LowpassBiquad::Data {
  237. Data(int32_t sample_rate, float cutoff_freq, float Q) : sample_rate_(sample_rate), cutoff_freq_(cutoff_freq), Q_(Q) {}
  238. int32_t sample_rate_;
  239. float cutoff_freq_;
  240. float Q_;
  241. };
  242. LowpassBiquad::LowpassBiquad(int32_t sample_rate, float cutoff_freq, float Q)
  243. : data_(std::make_shared<Data>(sample_rate, cutoff_freq, Q)) {}
  244. std::shared_ptr<TensorOperation> LowpassBiquad::Parse() {
  245. return std::make_shared<LowpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
  246. }
  247. // MuLawDecoding Transform Operation.
  248. struct MuLawDecoding::Data {
  249. explicit Data(int quantization_channels) : quantization_channels_(quantization_channels) {}
  250. int quantization_channels_;
  251. };
  252. MuLawDecoding::MuLawDecoding(int quantization_channels) : data_(std::make_shared<Data>(quantization_channels)) {}
  253. std::shared_ptr<TensorOperation> MuLawDecoding::Parse() {
  254. return std::make_shared<MuLawDecodingOperation>(data_->quantization_channels_);
  255. }
  256. // TimeMasking Transform Operation.
  257. struct TimeMasking::Data {
  258. Data(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  259. : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {}
  260. int32_t time_mask_param_;
  261. int32_t mask_start_;
  262. bool iid_masks_;
  263. float mask_value_;
  264. };
  265. TimeMasking::TimeMasking(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value)
  266. : data_(std::make_shared<Data>(iid_masks, time_mask_param, mask_start, mask_value)) {}
  267. std::shared_ptr<TensorOperation> TimeMasking::Parse() {
  268. return std::make_shared<TimeMaskingOperation>(data_->iid_masks_, data_->time_mask_param_, data_->mask_start_,
  269. data_->mask_value_);
  270. }
  271. // TimeStretch Transform Operation.
  272. struct TimeStretch::Data {
  273. explicit Data(float hop_length, int n_freq, float fixed_rate)
  274. : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
  275. float hop_length_;
  276. int n_freq_;
  277. float fixed_rate_;
  278. };
  279. TimeStretch::TimeStretch(float hop_length, int n_freq, float fixed_rate)
  280. : data_(std::make_shared<Data>(hop_length, n_freq, fixed_rate)) {}
  281. std::shared_ptr<TensorOperation> TimeStretch::Parse() {
  282. return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
  283. }
  284. } // namespace audio
  285. } // namespace dataset
  286. } // namespace mindspore