| @@ -31,6 +31,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/fade_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/flanger_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h" | |||
| @@ -276,6 +277,40 @@ std::shared_ptr<TensorOperation> Fade::Parse() { | |||
| return std::make_shared<FadeOperation>(data_->fade_in_len_, data_->fade_out_len_, data_->fade_shape_); | |||
| } | |||
| // Flanger Transform Operation. | |||
| struct Flanger::Data { | |||
| Data(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase, | |||
| Modulation modulation, Interpolation interpolation) | |||
| : sample_rate_(sample_rate), | |||
| delay_(delay), | |||
| depth_(depth), | |||
| regen_(regen), | |||
| width_(width), | |||
| speed_(speed), | |||
| phase_(phase), | |||
| modulation_(modulation), | |||
| interpolation_(interpolation) {} | |||
| int32_t sample_rate_; | |||
| float delay_; | |||
| float depth_; | |||
| float regen_; | |||
| float width_; | |||
| float speed_; | |||
| float phase_; | |||
| Modulation modulation_; | |||
| Interpolation interpolation_; | |||
| }; | |||
| Flanger::Flanger(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase, | |||
| Modulation modulation, Interpolation interpolation) | |||
| : data_(std::make_shared<Data>(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation)) {} | |||
| std::shared_ptr<TensorOperation> Flanger::Parse() { | |||
| return std::make_shared<FlangerOperation>(data_->sample_rate_, data_->delay_, data_->depth_, data_->regen_, | |||
| data_->width_, data_->speed_, data_->phase_, data_->modulation_, | |||
| data_->interpolation_); | |||
| } | |||
| // FrequencyMasking Transform Operation. | |||
| struct FrequencyMasking::Data { | |||
| Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value) | |||
| @@ -35,6 +35,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/fade_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/flanger_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h" | |||
| @@ -231,6 +232,32 @@ PYBIND_REGISTER(FadeOperation, 1, ([](const py::module *m) { | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER(Modulation, 0, ([](const py::module *m) { | |||
| (void)py::enum_<Modulation>(*m, "Modulation", py::arithmetic()) | |||
| .value("DE_MODULATION_SINUSOIDAL", Modulation::kSinusoidal) | |||
| .value("DE_MODULATION_TRIANGULAR", Modulation::kTriangular) | |||
| .export_values(); | |||
| })); | |||
| PYBIND_REGISTER(Interpolation, 0, ([](const py::module *m) { | |||
| (void)py::enum_<Interpolation>(*m, "Interpolation", py::arithmetic()) | |||
| .value("DE_INTERPOLATION_LINEAR", Interpolation::kLinear) | |||
| .value("DE_INTERPOLATION_QUADRATIC", Interpolation::kQuadratic) | |||
| .export_values(); | |||
| })); | |||
| PYBIND_REGISTER(FlangerOperation, 1, ([](const py::module *m) { | |||
| (void)py::class_<audio::FlangerOperation, TensorOperation, std::shared_ptr<audio::FlangerOperation>>( | |||
| *m, "FlangerOperation") | |||
| .def(py::init([](int32_t sample_rate, float delay, float depth, float regen, float width, | |||
| float speed, float phase, Modulation modulation, Interpolation interpolation) { | |||
| auto flanger = std::make_shared<audio::FlangerOperation>(sample_rate, delay, depth, regen, width, | |||
| speed, phase, modulation, interpolation); | |||
| THROW_IF_ERROR(flanger->ValidateParams()); | |||
| return flanger; | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER( | |||
| FrequencyMaskingOperation, 1, ([](const py::module *m) { | |||
| (void) | |||
| @@ -17,6 +17,7 @@ add_library(audio-ir-kernels OBJECT | |||
| detect_pitch_frequency_ir.cc | |||
| equalizer_biquad_ir.cc | |||
| fade_ir.cc | |||
| flanger_ir.cc | |||
| frequency_masking_ir.cc | |||
| highpass_biquad_ir.cc | |||
| lfilter_ir.cc | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/ir/kernels/flanger_ir.h" | |||
| #include "minddata/dataset/audio/ir/validators.h" | |||
| #include "minddata/dataset/audio/kernels/flanger_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| // FlangerOperation | |||
| FlangerOperation::FlangerOperation(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, | |||
| float phase, Modulation modulation, Interpolation interpolation) | |||
| : sample_rate_(sample_rate), | |||
| delay_(delay), | |||
| depth_(depth), | |||
| regen_(regen), | |||
| width_(width), | |||
| speed_(speed), | |||
| phase_(phase), | |||
| modulation_(modulation), | |||
| interpolation_(interpolation) {} | |||
| Status FlangerOperation::ValidateParams() { | |||
| RETURN_IF_NOT_OK(ValidateScalarNotZero("Flanger", "sample_rate", sample_rate_)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "delay", delay_, {0, 30}, false, false)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "depth", depth_, {0, 10}, false, false)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "regen", regen_, {-95, 95}, false, false)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "width", width_, {0, 100}, false, false)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "speed", speed_, {0.1, 10}, false, false)); | |||
| RETURN_IF_NOT_OK(ValidateScalar("Flanger", "phase", phase_, {0, 100}, false, false)); | |||
| return Status::OK(); | |||
| } | |||
| std::shared_ptr<TensorOp> FlangerOperation::Build() { | |||
| std::shared_ptr<FlangerOp> tensor_op = std::make_shared<FlangerOp>(sample_rate_, delay_, depth_, regen_, width_, | |||
| speed_, phase_, modulation_, interpolation_); | |||
| return tensor_op; | |||
| } | |||
| Status FlangerOperation::to_json(nlohmann::json *out_json) { | |||
| nlohmann::json args; | |||
| args["sample_rate"] = sample_rate_; | |||
| args["delay"] = delay_; | |||
| args["depth"] = depth_; | |||
| args["regen"] = regen_; | |||
| args["width"] = width_; | |||
| args["speed"] = speed_; | |||
| args["phase"] = phase_; | |||
| args["modulation"] = modulation_; | |||
| args["interpolation"] = interpolation_; | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,64 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "include/api/status.h" | |||
| #include "minddata/dataset/include/dataset/constants.h" | |||
| #include "minddata/dataset/include/dataset/transforms.h" | |||
| #include "minddata/dataset/kernels/ir/tensor_operation.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| constexpr char kFlangerOperation[] = "Flanger"; | |||
| class FlangerOperation : public TensorOperation { | |||
| public: | |||
| explicit FlangerOperation(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, | |||
| float phase, Modulation modulation, Interpolation interpolation); | |||
| ~FlangerOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| Status ValidateParams() override; | |||
| std::string Name() const override { return kFlangerOperation; } | |||
| Status to_json(nlohmann::json *out_json) override; | |||
| private: | |||
| int32_t sample_rate_; | |||
| float delay_; | |||
| float depth_; | |||
| float regen_; | |||
| float width_; | |||
| float speed_; | |||
| float phase_; | |||
| Modulation modulation_; | |||
| Interpolation interpolation_; | |||
| }; | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_ | |||
| @@ -18,6 +18,7 @@ add_library(audio-kernels OBJECT | |||
| detect_pitch_frequency_op.cc | |||
| equalizer_biquad_op.cc | |||
| fade_op.cc | |||
| flanger_op.cc | |||
| frequency_masking_op.cc | |||
| highpass_biquad_op.cc | |||
| lfilter_op.cc | |||
| @@ -725,5 +725,60 @@ Status DetectPitchFrequency(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(out, out_shape, output)); | |||
| return Status::OK(); | |||
| } | |||
| Status GenerateWaveTable(std::shared_ptr<Tensor> *output, const DataType &type, Modulation modulation, | |||
| int32_t table_size, float min, float max, float phase) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| int32_t phase_offset = static_cast<int32_t>(phase / PI / 2 * table_size + 0.5); | |||
| // get the offset of the i-th | |||
| std::vector<int32_t> point; | |||
| for (auto i = 0; i < table_size; i++) { | |||
| point.push_back((i + phase_offset) % table_size); | |||
| } | |||
| std::shared_ptr<Tensor> wave_table; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({table_size}), DataType(DataType::DE_FLOAT32), &wave_table)); | |||
| auto iter = wave_table->begin<float>(); | |||
| if (modulation == Modulation::kSinusoidal) { | |||
| for (int i = 0; i < table_size; iter++, i++) { | |||
| // change phase | |||
| *iter = (sin(point[i] * PI / table_size * 2) + 1) / 2; | |||
| } | |||
| } else { | |||
| for (int i = 0; i < table_size; iter++, i++) { | |||
| // change phase | |||
| *iter = point[i] * 2.0 / table_size; | |||
| // get complete offset | |||
| int32_t value = static_cast<int>(4 * point[i] / table_size); | |||
| // change the value of the square wave according to the number of complete offsets | |||
| if (value == 0) { | |||
| *iter = *iter + 0.5; | |||
| } else if (value == 1 || value == 2) { | |||
| *iter = 1.5 - *iter; | |||
| } else if (value == 3) { | |||
| *iter = *iter - 1.5; | |||
| } | |||
| } | |||
| } | |||
| for (iter = wave_table->begin<float>(); iter != wave_table->end<float>(); iter++) { | |||
| *iter = *iter * (max - min) + min; | |||
| } | |||
| if (type.IsInt()) { | |||
| for (iter = wave_table->begin<float>(); iter != wave_table->end<float>(); iter++) { | |||
| if (*iter < 0) { | |||
| *iter = *iter - 0.5; | |||
| } else { | |||
| *iter = *iter + 0.5; | |||
| } | |||
| } | |||
| RETURN_IF_NOT_OK(TypeCast(wave_table, output, DataType(DataType::DE_INT32))); | |||
| } else if (type.IsFloat()) { | |||
| RETURN_IF_NOT_OK(TypeCast(wave_table, output, DataType(DataType::DE_FLOAT32))); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -25,6 +25,7 @@ | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/kernels/data/data_utils.h" | |||
| #include "minddata/dataset/kernels/tensor_op.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| @@ -557,6 +558,245 @@ Status MedianSmoothing(const std::shared_ptr<Tensor> &input, std::shared_ptr<Ten | |||
| Status DetectPitchFrequency(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate, | |||
| float frame_time, int32_t win_length, int32_t freq_low, int32_t freq_high); | |||
| /// \brief A helper function for phaser, generates a table with given parameters. | |||
| /// \param output: Tensor of shape <time>. | |||
| /// \param type: can choose DataType::DE_FLOAT32 or DataType::DE_INT32. | |||
| /// \param modulation: Modulation of the input tensor. | |||
| /// It can be one of Modulation.kSinusoidal or Modulation.kTriangular. | |||
| /// \param table_size: The length of table. | |||
| /// \param min: Calculate the sampling rate within the delay time. | |||
| /// \param max: Calculate the sampling rate within the delay and delay depth time. | |||
| /// \param phase: Phase offset of function. | |||
| /// \return Status code. | |||
| Status GenerateWaveTable(std::shared_ptr<Tensor> *output, const DataType &type, Modulation modulation, | |||
| int32_t table_size, float min, float max, float phase); | |||
| /// \brief Flanger about interpolation effect. | |||
| /// \param input: Tensor of shape <batch, channel, time>. | |||
| /// \param int_delay: A dimensional vector about integer delay, subscript representing delay. | |||
| /// \param frac_delay: A dimensional vector about delay obtained by using the frac function. | |||
| /// \param interpolation: Interpolation of the input tensor. | |||
| /// It can be one of Interpolation::kLinear or Interpolation::kQuadratic. | |||
| /// \param delay_buf_pos: Minimum dimension length about delay_bufs. | |||
| /// \Returns Flanger about interpolation effect. | |||
| template <typename T> | |||
| std::vector<std::vector<T>> FlangerInterpolation(const std::shared_ptr<Tensor> &input, std::vector<int> int_delay, | |||
| const std::vector<T> &frac_delay, Interpolation interpolation, | |||
| int delay_buf_pos) { | |||
| int n_batch = input->shape()[0]; | |||
| int n_channels = input->shape()[-2]; | |||
| int delay_buf_length = input->shape()[-1]; | |||
| std::vector<std::vector<T>> delayed_value_a(n_batch, std::vector<T>(n_channels, 0)); | |||
| std::vector<std::vector<T>> delayed_value_b(n_batch, std::vector<T>(n_channels, 0)); | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| // delay after obtaining the current number of channels | |||
| auto iter_input = input->begin<T>(); | |||
| int it = j * n_channels * delay_buf_length + k * delay_buf_length; | |||
| iter_input += it + (delay_buf_pos + int_delay[k]) % delay_buf_length; | |||
| delayed_value_a[j][k] = *(iter_input); | |||
| iter_input = input->begin<T>(); | |||
| iter_input += it + (delay_buf_pos + int_delay[k] + 1) % delay_buf_length; | |||
| delayed_value_b[j][k] = *(iter_input); | |||
| } | |||
| } | |||
| // delay subscript backward | |||
| for (int j = 0; j < n_channels; j++) { | |||
| int_delay[j] = int_delay[j] + 2; | |||
| } | |||
| std::vector<std::vector<T>> delayed(n_batch, std::vector<T>(n_channels, 0)); | |||
| std::vector<std::vector<T>> delayed_value_c(n_batch, std::vector<T>(n_channels, 0)); | |||
| if (interpolation == Interpolation::kLinear) { | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| delayed[j][k] = delayed_value_a[j][k] + (delayed_value_b[j][k] - delayed_value_a[j][k]) * frac_delay[k]; | |||
| } | |||
| } | |||
| } else { | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| auto iter_input = input->begin<T>(); | |||
| int it = j * n_channels * delay_buf_length + k * delay_buf_length; | |||
| iter_input += it + (delay_buf_pos + int_delay[k]) % delay_buf_length; | |||
| delayed_value_c[j][k] = *(iter_input); | |||
| } | |||
| } | |||
| // delay subscript backward | |||
| for (int j = 0; j < n_channels; j++) { | |||
| int_delay[j] = int_delay[j] + 1; | |||
| } | |||
| std::vector<std::vector<T>> frac_delay_coefficient(n_batch, std::vector<T>(n_channels, 0)); | |||
| std::vector<std::vector<T>> frac_delay_value(n_batch, std::vector<T>(n_channels, 0)); | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| delayed_value_c[j][k] = delayed_value_c[j][k] - delayed_value_a[j][k]; | |||
| delayed_value_b[j][k] = delayed_value_b[j][k] - delayed_value_a[j][k]; | |||
| frac_delay_coefficient[j][k] = delayed_value_c[j][k] * 0.5 - delayed_value_b[j][k]; | |||
| frac_delay_value[j][k] = delayed_value_b[j][k] * 2 - delayed_value_c[j][k] * 0.5; | |||
| // the next delay is obtained by delaying the data in the buffer | |||
| delayed[j][k] = delayed_value_a[j][k] + | |||
| (frac_delay_coefficient[j][k] * frac_delay[k] + frac_delay_value[j][k]) * frac_delay[k]; | |||
| } | |||
| } | |||
| } | |||
| return delayed; | |||
| } | |||
| /// \brief Interval limiting function. | |||
| /// \param output_waveform: Tensor of shape <..., time>. | |||
| /// \param min: If value is less than min, min is returned. | |||
| /// \param max: If value is greater than max, max is returned. | |||
| /// \Returns Tensor at the same latitude. | |||
| template <typename T> | |||
| std::shared_ptr<Tensor> Clamp(const std::shared_ptr<Tensor> &tensor, T min, T max) { | |||
| for (auto itr = tensor->begin<T>(); itr != tensor->end<T>(); itr++) { | |||
| if (*itr > max) { | |||
| *itr = max; | |||
| } else if (*itr < min) { | |||
| *itr = min; | |||
| } | |||
| } | |||
| return tensor; | |||
| } | |||
| /// \brief Apply flanger effect. | |||
| /// \param input/output: Tensor of shape <..., channel, time>. | |||
| /// \param sample_rate: Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero. | |||
| /// \param delay: Desired delay in milliseconds (ms), range: [0, 30]. | |||
| /// \param depth: Desired delay depth in milliseconds (ms), range: [0, 10]. | |||
| /// \param regen: Desired regen (feedback gain) in dB., range: [-95, 95]. | |||
| /// \param width: Desired width (delay gain) in dB, range: [0, 100]. | |||
| /// \param speed: Modulation speed in Hz, range: [0.1, 10]. | |||
| /// \param phase: Percentage phase-shift for multi-channel, range: [0, 100]. | |||
| /// \param modulation: Modulation of the input tensor. | |||
| /// It can be one of Modulation::kSinusoidal or Modulation::kTriangular. | |||
| /// \param interpolation: Interpolation of the input tensor. | |||
| /// It can be one of Interpolation::kLinear or Interpolation::kQuadratic. | |||
| /// \return Status code. | |||
| template <typename T> | |||
| Status Flanger(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int32_t sample_rate, float delay, | |||
| float depth, float regen, float width, float speed, float phase, Modulation modulation, | |||
| Interpolation interpolation) { | |||
| std::shared_ptr<Tensor> waveform; | |||
| if (input->type() == DataType::DE_FLOAT64) { | |||
| waveform = input; | |||
| } else { | |||
| RETURN_IF_NOT_OK(TypeCast(input, &waveform, DataType(DataType::DE_FLOAT32))); | |||
| } | |||
| // convert to 3D (batch, channels, time) | |||
| TensorShape actual_shape = waveform->shape(); | |||
| TensorShape toShape({waveform->Size() / actual_shape[-2] / actual_shape[-1], actual_shape[-2], actual_shape[-1]}); | |||
| RETURN_IF_NOT_OK(waveform->Reshape(toShape)); | |||
| // scaling | |||
| T feedback_gain = static_cast<T>(regen) / 100; | |||
| T delay_gain = static_cast<T>(width) / 100; | |||
| T channel_phase = static_cast<T>(phase) / 100; | |||
| T delay_min = static_cast<T>(delay) / 1000; | |||
| T delay_depth = static_cast<T>(depth) / 1000; | |||
| // balance output: | |||
| T in_gain = 1.0 / (1 + delay_gain); | |||
| delay_gain = delay_gain / (1 + delay_gain); | |||
| // balance feedback loop: | |||
| delay_gain = delay_gain * (1 - abs(feedback_gain)); | |||
| int delay_buf_length = static_cast<int>((delay_min + delay_depth) * sample_rate + 0.5); | |||
| delay_buf_length = delay_buf_length + 2; | |||
| int lfo_length = static_cast<int>(sample_rate / speed); | |||
| T table_min = floor(delay_min * sample_rate + 0.5); | |||
| T table_max = delay_buf_length - 2.0; | |||
| // generate wave table | |||
| T lfo_phase = 3 * PI / 2; | |||
| std::shared_ptr<Tensor> lfo; | |||
| RETURN_IF_NOT_OK(GenerateWaveTable(&lfo, DataType(DataType::DE_FLOAT32), modulation, lfo_length, | |||
| static_cast<float>(table_min), static_cast<float>(table_max), | |||
| static_cast<float>(lfo_phase))); | |||
| int n_batch = waveform->shape()[0]; | |||
| int n_channels = waveform->shape()[-2]; | |||
| int time = waveform->shape()[-1]; | |||
| std::vector<T> delay_tensor(n_channels, 0.0), frac_delay(n_channels, 0.0); | |||
| std::vector<int> cur_channel_phase(n_channels, 0), int_delay(n_channels, 0); | |||
| // next delay | |||
| std::vector<std::vector<T>> delay_last(n_batch, std::vector<T>(n_channels, 0)); | |||
| // initialization of delay_bufs | |||
| TensorShape delay_bufs_shape({n_batch, n_channels, delay_buf_length}); | |||
| std::shared_ptr<Tensor> delay_bufs, output_waveform; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(delay_bufs_shape, waveform->type(), &delay_bufs)); | |||
| RETURN_IF_NOT_OK(delay_bufs->Zero()); | |||
| // initialization of output_waveform | |||
| TensorShape output_waveform_shape({n_batch, n_channels, actual_shape[-1]}); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(output_waveform_shape, waveform->type(), &output_waveform)); | |||
| int delay_buf_pos = 0, lfo_pos = 0; | |||
| for (int i = 0; i < time; i++) { | |||
| delay_buf_pos = (delay_buf_pos + delay_buf_length - 1) % delay_buf_length; | |||
| for (int j = 0; j < n_channels; j++) { | |||
| // get current channel phase | |||
| cur_channel_phase[j] = static_cast<int>(j * lfo_length * channel_phase + 0.5); | |||
| // through the current channel phase and lfo arrays to get the delay | |||
| auto iter_lfo = lfo->begin<float>(); | |||
| delay_tensor[j] = *(iter_lfo + (lfo_pos + cur_channel_phase[j]) % lfo_length); | |||
| // the frac delay is obtained by using the frac function | |||
| frac_delay[j] = delay_tensor[j] - static_cast<int>(delay_tensor[j]); | |||
| delay_tensor[j] = floor(delay_tensor[j]); | |||
| int_delay[j] = static_cast<int>(delay_tensor[j]); | |||
| } | |||
| // get the waveform of [:, :, i] | |||
| std::shared_ptr<Tensor> temp; | |||
| TensorShape temp_shape({n_batch, n_channels}); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(temp_shape, waveform->type(), &temp)); | |||
| Slice ss1(0, n_batch), ss2(0, n_channels), ss3(i, i + 1); | |||
| SliceOption sp1(ss1), sp2(ss2), sp3(ss3); | |||
| std::vector<SliceOption> slice_option; | |||
| slice_option.push_back(sp1), slice_option.push_back(sp2), slice_option.push_back(sp3); | |||
| RETURN_IF_NOT_OK(waveform->Slice(&temp, slice_option)); | |||
| auto iter_temp = temp->begin<T>(); | |||
| auto iter_delay_bufs = delay_bufs->begin<T>(); | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| iter_delay_bufs += delay_buf_pos; | |||
| // the value of delay_bufs is processed by next delay | |||
| *(iter_delay_bufs) = *iter_temp + delay_last[j][k] * feedback_gain; | |||
| iter_delay_bufs -= (delay_buf_pos - delay_buf_length); | |||
| iter_temp++; | |||
| } | |||
| } | |||
| // different delayed values can be obtained by judging the type of interpolation | |||
| std::vector<std::vector<T>> delayed(n_batch, std::vector<T>(n_channels, 0)); | |||
| delayed = FlangerInterpolation<T>(delay_bufs, int_delay, frac_delay, interpolation, delay_buf_pos); | |||
| for (int j = 0; j < n_channels; j++) { | |||
| int_delay[j] = int_delay[j] + 1; | |||
| } | |||
| iter_temp = temp->begin<T>(); | |||
| for (int j = 0; j < n_batch; j++) { | |||
| for (int k = 0; k < n_channels; k++) { | |||
| auto iter_output_waveform = output_waveform->begin<T>(); | |||
| // update the next delay | |||
| delay_last[j][k] = delayed[j][k]; | |||
| int it = j * n_channels * actual_shape[-1] + k * actual_shape[-1]; | |||
| iter_output_waveform += it + i; | |||
| // the results are obtained by balancing the output and balancing the feedback loop | |||
| *(iter_output_waveform) = *(iter_temp)*in_gain + delayed[j][k] * delay_gain; | |||
| iter_temp++; | |||
| } | |||
| } | |||
| // update lfo location | |||
| lfo_pos = (lfo_pos + 1) % lfo_length; | |||
| } | |||
| // the output value is limited by the interval limit function | |||
| output_waveform = Clamp<T>(output_waveform, -1, 1); | |||
| // convert dimension to waveform dimension | |||
| RETURN_IF_NOT_OK(output_waveform->Reshape(actual_shape)); | |||
| RETURN_IF_NOT_OK(TypeCast(output_waveform, output, input->type())); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_ | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/kernels/flanger_op.h" | |||
| #include "minddata/dataset/audio/kernels/audio_utils.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| Status FlangerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| // check input dimensions, it should be 2 dimensions or more | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() >= 2, | |||
| "Flanger: input tensor is not in shape of <..., channel, time>."); | |||
| // check input channel, it should be less than or equal to 4 | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-2] <= 4, | |||
| "Flanger: the channel of input tensor must be less than or equal to 4, but got: " + | |||
| std::to_string(input->shape()[-2])); | |||
| // check input type, it should be [int, float, double] | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input->type().IsNumeric(), | |||
| "Flanger: input tensor type should be int, float or double, but got: " + input->type().ToString()); | |||
| if (input->type() == DataType(DataType::DE_FLOAT64)) { | |||
| return Flanger<double>(input, output, sample_rate_, delay_, depth_, regen_, width_, speed_, phase_, Modulation_, | |||
| Interpolation_); | |||
| } else { | |||
| return Flanger<float>(input, output, sample_rate_, delay_, depth_, regen_, width_, speed_, phase_, Modulation_, | |||
| Interpolation_); | |||
| } | |||
| } | |||
| Status FlangerOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) { | |||
| RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| inputs[0].IsNumeric(), | |||
| "Flanger: input tensor type should be int, float or double, but got: " + inputs[0].ToString()); | |||
| outputs[0] = inputs[0]; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/include/dataset/constants.h" | |||
| #include "minddata/dataset/kernels/tensor_op.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class FlangerOp : public TensorOp { | |||
| public: | |||
| explicit FlangerOp(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase, | |||
| Modulation modulation, Interpolation interpolation) | |||
| : sample_rate_(sample_rate), | |||
| delay_(delay), | |||
| depth_(depth), | |||
| regen_(regen), | |||
| width_(width), | |||
| speed_(speed), | |||
| phase_(phase), | |||
| Modulation_(modulation), | |||
| Interpolation_(interpolation) {} | |||
| ~FlangerOp() override = default; | |||
| void Print(std::ostream &out) const override { | |||
| out << Name() << ": sample_rate: " << sample_rate_ << ", delay:" << delay_ << ", depth: " << depth_ | |||
| << ", regen: " << regen_ << ", width: " << width_ << ", speed: " << speed_ << ", phase: " << phase_ | |||
| << ", Modulation: " << Modulation_ << ", Interpolation: " << Interpolation_ << std::endl; | |||
| } | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| std::string Name() const override { return kFlangerOp; } | |||
| Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override; | |||
| private: | |||
| int32_t sample_rate_; | |||
| float delay_; | |||
| float depth_; | |||
| float regen_; | |||
| float width_; | |||
| float speed_; | |||
| float phase_; | |||
| Modulation Modulation_; | |||
| Interpolation Interpolation_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_ | |||
| @@ -374,6 +374,38 @@ class Fade final : public TensorTransform { | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief Apply a flanger effect to the audio. | |||
| class Flanger final : public TensorTransform { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). | |||
| /// \param[in] delay Desired delay in milliseconds (ms), range: [0, 30] (Default: 0.0). | |||
| /// \param[in] depth Desired delay depth in milliseconds (ms), range: [0, 10] (Default: 2.0). | |||
| /// \param[in] regen Desired regen (feedback gain) in dB., range: [-95, 95] (Default: 0.0). | |||
| /// \param[in] width Desired width (delay gain) in dB, range: [0, 100] (Default: 71.0). | |||
| /// \param[in] speed Modulation speed in Hz, range: [0.1, 10] (Default: 0.5). | |||
| /// \param[in] phase Percentage phase-shift for multi-channel, range: [0, 100] (Default: 25.0). | |||
| /// \param[in] modulation Modulation of input tensor, must be one of [Modulation::kSinusoidal, | |||
| /// Modulation::kTriangular] (Default:Modulation::kSinusoidal). | |||
| /// \param[in] interpolation Interpolation of input tensor, must be one of [Interpolation::kLinear, | |||
| /// Interpolation::kQuadratic] (Default:Interpolation::kLinear). | |||
| explicit Flanger(int32_t sample_rate, float delay = 0.0, float depth = 2.0, float regen = 0.0, float width = 71.0, | |||
| float speed = 0.5, float phase = 25.0, Modulation modulation = Modulation::kSinusoidal, | |||
| Interpolation interpolation = Interpolation::kLinear); | |||
| /// \brief Destructor. | |||
| ~Flanger() = default; | |||
| protected: | |||
| /// \brief Function to convert TensorTransform object into a TensorOperation object. | |||
| /// \return Shared pointer to TensorOperation object. | |||
| std::shared_ptr<TensorOperation> Parse() override; | |||
| private: | |||
| struct Data; | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief FrequencyMasking TensorTransform. | |||
| /// \notes Apply masking to a spectrogram in the frequency domain. | |||
| class FrequencyMasking final : public TensorTransform { | |||
| @@ -26,6 +26,18 @@ namespace dataset { | |||
| using uchar = unsigned char; | |||
| using dsize_t = int64_t; | |||
| /// \brief The modulation in Flanger | |||
| enum class Modulation { | |||
| kSinusoidal = 0, ///< Use sinusoidal modulation. | |||
| kTriangular = 1 ///< Use triangular modulation. | |||
| }; | |||
| /// \brief The interpolation in Flanger | |||
| enum class Interpolation { | |||
| kLinear = 0, ///< Use linear for delay-line interpolation. | |||
| kQuadratic = 1 ///< Use quadratic for delay-line interpolation. | |||
| }; | |||
| /// \brief The color conversion code | |||
| enum class ConvertMode { | |||
| COLOR_BGR2BGRA = 0, ///< Add alpha channel to BGR image. | |||
| @@ -158,6 +158,7 @@ constexpr char kDeemphBiquadOp[] = "DeemphBiquadOp"; | |||
| constexpr char kDetectPitchFrequencyOp[] = "DetectPitchFrequencyOp"; | |||
| constexpr char kEqualizerBiquadOp[] = "EqualizerBiquadOp"; | |||
| constexpr char kFadeOp[] = "FadeOp"; | |||
| constexpr char kFlangerOp[] = "FlangerOp"; | |||
| constexpr char kFrequencyMaskingOp[] = "FrequencyMaskingOp"; | |||
| constexpr char kHighpassBiquadOp[] = "HighpassBiquadOp"; | |||
| constexpr char kLFilterOp[] = "LFilterOp"; | |||
| @@ -23,12 +23,12 @@ import numpy as np | |||
| import mindspore._c_dataengine as cde | |||
| from ..transforms.c_transforms import TensorOperation | |||
| from .utils import FadeShape, GainType, ScaleType | |||
| from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType | |||
| from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \ | |||
| check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_contrast, check_dc_shift, \ | |||
| check_deemph_biquad, check_detect_pitch_frequency, check_equalizer_biquad, check_fade, check_highpass_biquad, \ | |||
| check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mu_law_decoding, check_riaa_biquad, \ | |||
| check_time_stretch, check_treble_biquad, check_vol | |||
| check_deemph_biquad, check_detect_pitch_frequency, check_equalizer_biquad, check_fade, check_flanger, \ | |||
| check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mu_law_decoding, \ | |||
| check_riaa_biquad, check_time_stretch, check_treble_biquad, check_vol | |||
| class AudioTensorOperation(TensorOperation): | |||
| @@ -498,6 +498,59 @@ class Fade(AudioTensorOperation): | |||
| return cde.FadeOperation(self.fade_in_len, self.fade_out_len, DE_C_FADESHAPE_TYPE[self.fade_shape]) | |||
| DE_C_MODULATION_TYPE = {Modulation.SINUSOIDAL: cde.Modulation.DE_MODULATION_SINUSOIDAL, | |||
| Modulation.TRIANGULAR: cde.Modulation.DE_MODULATION_TRIANGULAR} | |||
| DE_C_INTERPOLATION_TYPE = {Interpolation.LINEAR: cde.Interpolation.DE_INTERPOLATION_LINEAR, | |||
| Interpolation.QUADRATIC: cde.Interpolation.DE_INTERPOLATION_QUADRATIC} | |||
| class Flanger(AudioTensorOperation): | |||
| """ | |||
| Apply a flanger effect to the audio. | |||
| Args: | |||
| sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz). | |||
| delay (float, optional): Desired delay in milliseconds (ms), range: [0, 30] (default=0.0). | |||
| depth (float, optional): Desired delay depth in milliseconds (ms), range: [0, 10] (default=2.0). | |||
| regen (float, optional): Desired regen (feedback gain) in dB, range: [-95, 95] (default=0.0). | |||
| width (float, optional): Desired width (delay gain) in dB, range: [0, 100] (default=71.0). | |||
| speed (float, optional): Modulation speed in Hz, range: [0.1, 10] (default=0.5). | |||
| phase (float, optional): Percentage phase-shift for multi-channel, range: [0, 100] (default=25.0). | |||
| modulation (Modulation, optional): Modulation of the input tensor (default=Modulation.SINUSOIDAL). | |||
| It can be one of Modulation.SINUSOIDAL or Modulation.TRIANGULAR. | |||
| interpolation (Interpolation, optional): Interpolation of the input tensor (default=Interpolation.LINEAR). | |||
| It can be one of Interpolation.LINEAR or Interpolation.QUADRATIC. | |||
| Examples: | |||
| >>> import numpy as np | |||
| >>> | |||
| >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]]) | |||
| >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"]) | |||
| >>> transforms = [audio.Flanger(44100)] | |||
| >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"]) | |||
| """ | |||
| @check_flanger | |||
| def __init__(self, sample_rate, delay=0.0, depth=2.0, regen=0.0, width=71.0, speed=0.5, | |||
| phase=25.0, modulation=Modulation.SINUSOIDAL, interpolation=Interpolation.LINEAR): | |||
| self.sample_rate = sample_rate | |||
| self.delay = delay | |||
| self.depth = depth | |||
| self.regen = regen | |||
| self.width = width | |||
| self.speed = speed | |||
| self.phase = phase | |||
| self.modulation = modulation | |||
| self.interpolation = interpolation | |||
| def parse(self): | |||
| return cde.FlangerOperation(self.sample_rate, self.delay, self.depth, self.regen, self.width, self.speed, | |||
| self.phase, DE_C_MODULATION_TYPE[self.modulation], | |||
| DE_C_INTERPOLATION_TYPE[self.interpolation]) | |||
| class FrequencyMasking(AudioTensorOperation): | |||
| """ | |||
| Apply masking to a spectrogram in the frequency domain. | |||
| @@ -54,6 +54,32 @@ class GainType(str, Enum): | |||
| DB: str = "db" | |||
| class Interpolation(str, Enum): | |||
| """ | |||
| Interpolation Type. | |||
| Possible enumeration values are: Interpolation.LINEAR, Interpolation.QUADRATIC. | |||
| - Interpolation.LINEAR: means input interpolation type is linear. | |||
| - Interpolation.QUADRATIC: means input interpolation type is quadratic. | |||
| """ | |||
| LINEAR: str = "linear" | |||
| QUADRATIC: str = "quadratic" | |||
| class Modulation(str, Enum): | |||
| """ | |||
| Modulation Type. | |||
| Possible enumeration values are: Modulation.SINUSOIDAL, Modulation.TRIANGULAR. | |||
| - Modulation.SINUSOIDAL: means input modulation type is sinusoidal. | |||
| - Modulation.TRIANGULAR: means input modulation type is triangular. | |||
| """ | |||
| SINUSOIDAL: str = "sinusoidal" | |||
| TRIANGULAR: str = "triangular" | |||
| class ScaleType(str, Enum): | |||
| """ | |||
| Scale Types. | |||
| @@ -21,7 +21,7 @@ from functools import wraps | |||
| from mindspore.dataset.core.validator_helpers import check_float32, check_float32_not_zero, check_int32_not_zero, \ | |||
| check_list_same_size, check_non_negative_float32, check_non_negative_int32, check_pos_float32, check_pos_int32, \ | |||
| check_value, parse_user_args, type_check | |||
| from .utils import FadeShape, GainType, ScaleType | |||
| from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType | |||
| def check_amplitude_to_db(method): | |||
| @@ -475,3 +475,38 @@ def check_detect_pitch_frequency(method): | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_flanger(method): | |||
| """Wrapper method to check the parameters of Flanger.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation], _ = parse_user_args( | |||
| method, *args, **kwargs) | |||
| type_check(sample_rate, (int,), "sample_rate") | |||
| check_int32_not_zero(sample_rate, "sample_rate") | |||
| type_check(delay, (float, int), "delay") | |||
| check_value(delay, [0, 30], "delay") | |||
| type_check(depth, (float, int), "depth") | |||
| check_value(depth, [0, 10], "depth") | |||
| type_check(regen, (float, int), "regen") | |||
| check_value(regen, [-95, 95], "regen") | |||
| type_check(width, (float, int), "width") | |||
| check_value(width, [0, 100], "width") | |||
| type_check(speed, (float, int), "speed") | |||
| check_value(speed, [0.1, 10], "speed") | |||
| type_check(phase, (float, int), "phase") | |||
| check_value(phase, [0, 100], "phase") | |||
| type_check(modulation, (Modulation), "modulation") | |||
| type_check(interpolation, (Interpolation), "interpolation") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| @@ -1482,3 +1482,112 @@ TEST_F(MindDataTestPipeline, TestDetectPitchFrequencyParamCheck) { | |||
| std::shared_ptr<Iterator> iter05 = ds05->CreateIterator(); | |||
| EXPECT_EQ(iter05, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestFlangerBasic) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlangerBasic."; | |||
| // Original waveform | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {2, 200})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| ds = ds->SetNumWorkers(4); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto FlangerOp = audio::Flanger(44100); | |||
| ds = ds->Map({FlangerOp}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Filtered waveform by flanger | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| std::vector<int64_t> expected = {2, 200}; | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestFlangerParamCheck) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlangerParamCheck."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| // Original waveform | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {2, 2})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Check sample_rate | |||
| MS_LOG(INFO) << "sample_rate is zero."; | |||
| auto flanger_op_sample_rate = | |||
| audio::Flanger(0, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsSample_rate = ds->Map({flanger_op_sample_rate}); | |||
| EXPECT_NE(dsSample_rate, nullptr); | |||
| std::shared_ptr<Iterator> iterSample_rate = dsSample_rate->CreateIterator(); | |||
| EXPECT_EQ(iterSample_rate, nullptr); | |||
| // Check delay | |||
| MS_LOG(INFO) << "delay is out of range."; | |||
| auto flanger_op_delay = | |||
| audio::Flanger(44100, 50.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsDelay = ds->Map({flanger_op_delay}); | |||
| EXPECT_NE(dsDelay, nullptr); | |||
| std::shared_ptr<Iterator> iterDelay = dsDelay->CreateIterator(); | |||
| EXPECT_EQ(iterDelay, nullptr); | |||
| // Check depth | |||
| MS_LOG(INFO) << "depth is out of range."; | |||
| auto flanger_op_depth = | |||
| audio::Flanger(44100, 0.0, 20.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsDepth = ds->Map({flanger_op_depth}); | |||
| EXPECT_NE(dsDepth, nullptr); | |||
| std::shared_ptr<Iterator> iterDepth = dsDepth->CreateIterator(); | |||
| EXPECT_EQ(iterDepth, nullptr); | |||
| // Check regen | |||
| MS_LOG(INFO) << "regen is out of range."; | |||
| auto flanger_op_regen = | |||
| audio::Flanger(44100, 0.0, 2.0, 100.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsRegen = ds->Map({flanger_op_regen}); | |||
| EXPECT_NE(dsRegen, nullptr); | |||
| std::shared_ptr<Iterator> iterRegen = dsRegen->CreateIterator(); | |||
| EXPECT_EQ(iterRegen, nullptr); | |||
| // Check width | |||
| MS_LOG(INFO) << "width is out of range."; | |||
| auto flanger_op_width = | |||
| audio::Flanger(44100, 0.0, 2.0, 0.0, 200.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsWidth = ds->Map({flanger_op_width}); | |||
| EXPECT_NE(dsWidth, nullptr); | |||
| std::shared_ptr<Iterator> iterWidth = dsWidth->CreateIterator(); | |||
| EXPECT_EQ(iterWidth, nullptr); | |||
| // Check speed | |||
| MS_LOG(INFO) << "speed is out of range."; | |||
| auto flanger_op_speed = | |||
| audio::Flanger(44100, 0.0, 2.0, 0.0, 71.0, 20, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsSpeed = ds->Map({flanger_op_speed}); | |||
| EXPECT_NE(dsSpeed, nullptr); | |||
| std::shared_ptr<Iterator> iterSpeed = dsSpeed->CreateIterator(); | |||
| EXPECT_EQ(iterSpeed, nullptr); | |||
| // Check phase | |||
| MS_LOG(INFO) << "phase is out of range."; | |||
| auto flanger_op_phase = | |||
| audio::Flanger(44100, 0.0, 2.0, 0.0, 71.0, 20, 25.0, Modulation::kSinusoidal, Interpolation::kLinear); | |||
| std::shared_ptr<Dataset> dsPhase = ds->Map({flanger_op_phase}); | |||
| EXPECT_NE(dsPhase, nullptr); | |||
| std::shared_ptr<Iterator> iterPhase = dsPhase->CreateIterator(); | |||
| EXPECT_EQ(iterPhase, nullptr); | |||
| } | |||
| @@ -1291,3 +1291,36 @@ TEST_F(MindDataTestExecute, TestDetectPitchFrequencyWithWrongArg) { | |||
| Status s05 = Transform05(input_02, &input_02); | |||
| EXPECT_FALSE(s05.IsOk()); | |||
| } | |||
| TEST_F(MindDataTestExecute, TestFlangerWithEager) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-TestFlangerWithEager."; | |||
| // Original waveform | |||
| std::vector<float> labels = { | |||
| 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, | |||
| 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, | |||
| 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, | |||
| 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, | |||
| 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; | |||
| std::shared_ptr<Tensor> input; | |||
| ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); | |||
| auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input)); | |||
| std::shared_ptr<TensorTransform> flanger_01 = std::make_shared<audio::Flanger>(44100); | |||
| mindspore::dataset::Execute Transform01({flanger_01}); | |||
| // Filtered waveform by flanger | |||
| Status s01 = Transform01(input_02, &input_02); | |||
| EXPECT_TRUE(s01.IsOk()); | |||
| } | |||
| TEST_F(MindDataTestExecute, TestFlangerWithWrongArg) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-TestFlangerWithWrongArg."; | |||
| std::vector<double> labels = {1.143, 1.3123, 2.632, 2.554, 1.213, 1.3, 0.456, 3.563}; | |||
| std::shared_ptr<Tensor> input; | |||
| ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({4, 2}), &input)); | |||
| auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input)); | |||
| // Check sample_rate | |||
| MS_LOG(INFO) << "sample_rate is zero."; | |||
| std::shared_ptr<TensorTransform> flanger_op = std::make_shared<audio::Flanger>(0); | |||
| mindspore::dataset::Execute Transform01({flanger_op}); | |||
| Status s01 = Transform01(input_02, &input_02); | |||
| EXPECT_FALSE(s01.IsOk()); | |||
| } | |||
| @@ -0,0 +1,210 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.audio.transforms as audio | |||
| from mindspore import log as logger | |||
| from mindspore.dataset.audio.utils import Modulation, Interpolation | |||
| def count_unequal_element(data_expected, data_me, rtol, atol): | |||
| assert data_expected.shape == data_me.shape | |||
| total_count = len(data_expected.flatten()) | |||
| error = np.abs(data_expected - data_me) | |||
| greater = np.greater(error, atol + np.abs(data_expected) * rtol) | |||
| loss_count = np.count_nonzero(greater) | |||
| assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( | |||
| data_expected[greater], data_me[greater], error[greater]) | |||
| def test_flanger_eager_sinusoidal_linear_float64(): | |||
| """ mindspore eager mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float64) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[0.10000000000, 0.19999999536, 0.29999998145], | |||
| [0.23391812865, 0.29239766081, 0.35087719298]], dtype=np.float64) | |||
| flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.SINUSOIDAL, Interpolation.LINEAR) | |||
| # Filtered waveform by flanger | |||
| output = flanger_op(waveform) | |||
| count_unequal_element(expect_waveform, output, 0.0001, 0.0001) | |||
| def test_flanger_eager_triangular_linear_float32(): | |||
| """ mindspore eager mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[-1.2, 2, -3.6], [1, 2.4, 3.7]], dtype=np.float32) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[-1.0000000000, 1.0000000000, -1.0000000000], | |||
| [0.58479529619, 1.0000000000, 1.0000000000]], dtype=np.float32) | |||
| flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.TRIANGULAR, Interpolation.LINEAR) | |||
| # Filtered waveform by flanger | |||
| output = flanger_op(waveform) | |||
| count_unequal_element(expect_waveform, output, 0.0001, 0.0001) | |||
| def test_flanger_eager_triangular_linear_int(): | |||
| """ mindspore eager mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[-2, -3, 0], [2, 2, 3]], dtype=np.int) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[-1, -1, 0], | |||
| [1, 1, 1]], dtype=np.int) | |||
| flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.TRIANGULAR, Interpolation.LINEAR) | |||
| # Filtered waveform by flanger | |||
| output = flanger_op(waveform) | |||
| count_unequal_element(expect_waveform, output, 0.0001, 0.0001) | |||
| def test_flanger_shape_221(): | |||
| """ mindspore eager mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[[1], [1.1]], [[0.9], [0.6]]], dtype=np.float64) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[[1.00000000], | |||
| [0.64327485]], | |||
| [[0.90000000], | |||
| [0.35087719]]], dtype=np.float64) | |||
| flanger_op = audio.Flanger(44100) | |||
| # Filtered waveform by flanger | |||
| output = flanger_op(waveform) | |||
| count_unequal_element(expect_waveform, output, 0.0001, 0.0001) | |||
| def test_flanger_shape_11211(): | |||
| """ mindspore eager mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[[[[0.44]], [[0.55]]]]], dtype=np.float64) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[[[[0.44000000]], [[0.55000000]]]]], dtype=np.float64) | |||
| flanger_op = audio.Flanger(44100) | |||
| # Filtered waveform by flanger | |||
| output = flanger_op(waveform) | |||
| count_unequal_element(expect_waveform, output, 0.0001, 0.0001) | |||
| def test_flanger_pipeline(): | |||
| """ mindspore pipeline mode normal testcase:flanger op""" | |||
| # Original waveform | |||
| waveform = np.array([[[1.1, 1.2, 1.3], [1.4, 1.5, 1.6]]], dtype=np.float64) | |||
| # Expect waveform | |||
| expect_waveform = np.array([[[1.00000000000, 1.00000000000, 1.00000000000], | |||
| [0.81871345029, 0.87719298245, 0.93567251461]]], dtype=np.float64) | |||
| data = (waveform, np.random.sample((1, 2, 1))) | |||
| dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False) | |||
| flanger_op = audio.Flanger(44100) | |||
| # Filtered waveform by flanger | |||
| dataset = dataset.map( | |||
| input_columns=["channel"], operations=flanger_op, num_parallel_workers=1) | |||
| i = 0 | |||
| for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count_unequal_element(expect_waveform[i, :], | |||
| item['channel'], 0.0001, 0.0001) | |||
| i += 1 | |||
| def test_invalid_flanger_input(): | |||
| def test_invalid_input(test_name, sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation, | |||
| error, error_msg): | |||
| logger.info("Test Flanger with bad input: {0}".format(test_name)) | |||
| with pytest.raises(error) as error_info: | |||
| audio.Flanger(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation) | |||
| assert error_msg in str(error_info.value) | |||
| test_invalid_input("invalid sample_rate parameter value", 0, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") | |||
| test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument sample_rate with value 44100.5 is not of " | |||
| "type [<class 'int'>], but got <class 'float'>.") | |||
| test_invalid_input("invalid sample_rate parameter type as a String", "44100", 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument sample_rate with value 44100 is not of " | |||
| "type [<class 'int'>], but got <class 'str'>.") | |||
| test_invalid_input("invalid delay parameter type as a String", 44100, "0.0", 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument delay with value 0.0 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid delay parameter value", 44100, 50, 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input delay is not within the required interval of [0, 30].") | |||
| test_invalid_input("invalid depth parameter type as a String", 44100, 0.0, "2.0", 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument depth with value 2.0 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid depth parameter value", 44100, 0.0, 50.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input depth is not within the required interval of [0, 10].") | |||
| test_invalid_input("invalid regen parameter type as a String", 44100, 0.0, 2.0, "0.0", 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument regen with value 0.0 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid regen parameter value", 44100, 0.0, 2.0, 100.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input regen is not within the required interval of [-95, 95].") | |||
| test_invalid_input("invalid width parameter type as a String", 44100, 0.0, 2.0, 0.0, "71.0", 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument width with value 71.0 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid width parameter value", 44100, 0.0, 2.0, 0.0, 150.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input width is not within the required interval of [0, 100].") | |||
| test_invalid_input("invalid speed parameter type as a String", 44100, 0.0, 2.0, 0.0, 71.0, "0.5", 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument speed with value 0.5 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid speed parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 50, 25.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input speed is not within the required interval of [0.1, 10].") | |||
| test_invalid_input("invalid phase parameter type as a String", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, "25.0", | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError, | |||
| "Argument phase with value 25.0 is not of type [<class 'float'>, <class 'int'>]," | |||
| " but got <class 'str'>.") | |||
| test_invalid_input("invalid phase parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 150.0, | |||
| Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError, | |||
| "Input phase is not within the required interval of [0, 100].") | |||
| test_invalid_input("invalid modulation parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, "test", | |||
| Interpolation.LINEAR, TypeError, | |||
| "Argument modulation with value test is not of type [<Modulation.SINUSOIDAL: 'sinusoidal'>," | |||
| " <Modulation.TRIANGULAR: 'triangular'>], but got <class 'str'>.") | |||
| test_invalid_input("invalid modulation parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, | |||
| Modulation.SINUSOIDAL, "test", TypeError, | |||
| "Argument interpolation with value test is not of type [<Interpolation.LINEAR: 'linear'>," | |||
| " <Interpolation.QUADRATIC: 'quadratic'>], but got <class 'str'>.") | |||
| if __name__ == '__main__': | |||
| test_flanger_eager_sinusoidal_linear_float64() | |||
| test_flanger_eager_triangular_linear_float32() | |||
| test_flanger_eager_triangular_linear_int() | |||
| test_flanger_shape_221() | |||
| test_flanger_shape_11211() | |||
| test_flanger_pipeline() | |||
| test_invalid_flanger_input() | |||