Browse Source

[feat][assistant][I3J6U9] add new data operator Flanger

tags/v1.6.0
li-qiyao 4 years ago
parent
commit
2283442855
19 changed files with 1140 additions and 5 deletions
  1. +35
    -0
      mindspore/ccsrc/minddata/dataset/api/audio.cc
  2. +27
    -0
      mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
  3. +1
    -0
      mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
  4. +72
    -0
      mindspore/ccsrc/minddata/dataset/audio/ir/kernels/flanger_ir.cc
  5. +64
    -0
      mindspore/ccsrc/minddata/dataset/audio/ir/kernels/flanger_ir.h
  6. +1
    -0
      mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
  7. +55
    -0
      mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
  8. +240
    -0
      mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
  9. +57
    -0
      mindspore/ccsrc/minddata/dataset/audio/kernels/flanger_op.cc
  10. +72
    -0
      mindspore/ccsrc/minddata/dataset/audio/kernels/flanger_op.h
  11. +32
    -0
      mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
  12. +12
    -0
      mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
  13. +1
    -0
      mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
  14. +57
    -4
      mindspore/dataset/audio/transforms.py
  15. +26
    -0
      mindspore/dataset/audio/utils.py
  16. +36
    -1
      mindspore/dataset/audio/validators.py
  17. +109
    -0
      tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
  18. +33
    -0
      tests/ut/cpp/dataset/execute_test.cc
  19. +210
    -0
      tests/ut/python/dataset/test_flanger.py

+ 35
- 0
mindspore/ccsrc/minddata/dataset/api/audio.cc View File

@@ -31,6 +31,7 @@
#include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h"
#include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h"
#include "minddata/dataset/audio/ir/kernels/fade_ir.h"
#include "minddata/dataset/audio/ir/kernels/flanger_ir.h"
#include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
#include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
#include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
@@ -276,6 +277,40 @@ std::shared_ptr<TensorOperation> Fade::Parse() {
return std::make_shared<FadeOperation>(data_->fade_in_len_, data_->fade_out_len_, data_->fade_shape_);
}
// Flanger Transform Operation.
struct Flanger::Data {
Data(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
Modulation modulation, Interpolation interpolation)
: sample_rate_(sample_rate),
delay_(delay),
depth_(depth),
regen_(regen),
width_(width),
speed_(speed),
phase_(phase),
modulation_(modulation),
interpolation_(interpolation) {}
int32_t sample_rate_;
float delay_;
float depth_;
float regen_;
float width_;
float speed_;
float phase_;
Modulation modulation_;
Interpolation interpolation_;
};
Flanger::Flanger(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
Modulation modulation, Interpolation interpolation)
: data_(std::make_shared<Data>(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation)) {}
std::shared_ptr<TensorOperation> Flanger::Parse() {
return std::make_shared<FlangerOperation>(data_->sample_rate_, data_->delay_, data_->depth_, data_->regen_,
data_->width_, data_->speed_, data_->phase_, data_->modulation_,
data_->interpolation_);
}
// FrequencyMasking Transform Operation.
struct FrequencyMasking::Data {
Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value)


+ 27
- 0
mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc View File

@@ -35,6 +35,7 @@
#include "minddata/dataset/audio/ir/kernels/detect_pitch_frequency_ir.h"
#include "minddata/dataset/audio/ir/kernels/equalizer_biquad_ir.h"
#include "minddata/dataset/audio/ir/kernels/fade_ir.h"
#include "minddata/dataset/audio/ir/kernels/flanger_ir.h"
#include "minddata/dataset/audio/ir/kernels/frequency_masking_ir.h"
#include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
#include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
@@ -231,6 +232,32 @@ PYBIND_REGISTER(FadeOperation, 1, ([](const py::module *m) {
}));
}));
PYBIND_REGISTER(Modulation, 0, ([](const py::module *m) {
(void)py::enum_<Modulation>(*m, "Modulation", py::arithmetic())
.value("DE_MODULATION_SINUSOIDAL", Modulation::kSinusoidal)
.value("DE_MODULATION_TRIANGULAR", Modulation::kTriangular)
.export_values();
}));
PYBIND_REGISTER(Interpolation, 0, ([](const py::module *m) {
(void)py::enum_<Interpolation>(*m, "Interpolation", py::arithmetic())
.value("DE_INTERPOLATION_LINEAR", Interpolation::kLinear)
.value("DE_INTERPOLATION_QUADRATIC", Interpolation::kQuadratic)
.export_values();
}));
PYBIND_REGISTER(FlangerOperation, 1, ([](const py::module *m) {
(void)py::class_<audio::FlangerOperation, TensorOperation, std::shared_ptr<audio::FlangerOperation>>(
*m, "FlangerOperation")
.def(py::init([](int32_t sample_rate, float delay, float depth, float regen, float width,
float speed, float phase, Modulation modulation, Interpolation interpolation) {
auto flanger = std::make_shared<audio::FlangerOperation>(sample_rate, delay, depth, regen, width,
speed, phase, modulation, interpolation);
THROW_IF_ERROR(flanger->ValidateParams());
return flanger;
}));
}));
PYBIND_REGISTER(
FrequencyMaskingOperation, 1, ([](const py::module *m) {
(void)


+ 1
- 0
mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt View File

@@ -17,6 +17,7 @@ add_library(audio-ir-kernels OBJECT
detect_pitch_frequency_ir.cc
equalizer_biquad_ir.cc
fade_ir.cc
flanger_ir.cc
frequency_masking_ir.cc
highpass_biquad_ir.cc
lfilter_ir.cc


+ 72
- 0
mindspore/ccsrc/minddata/dataset/audio/ir/kernels/flanger_ir.cc View File

@@ -0,0 +1,72 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/audio/ir/kernels/flanger_ir.h"
#include "minddata/dataset/audio/ir/validators.h"
#include "minddata/dataset/audio/kernels/flanger_op.h"
namespace mindspore {
namespace dataset {
namespace audio {
// FlangerOperation
FlangerOperation::FlangerOperation(int32_t sample_rate, float delay, float depth, float regen, float width, float speed,
float phase, Modulation modulation, Interpolation interpolation)
: sample_rate_(sample_rate),
delay_(delay),
depth_(depth),
regen_(regen),
width_(width),
speed_(speed),
phase_(phase),
modulation_(modulation),
interpolation_(interpolation) {}
Status FlangerOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateScalarNotZero("Flanger", "sample_rate", sample_rate_));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "delay", delay_, {0, 30}, false, false));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "depth", depth_, {0, 10}, false, false));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "regen", regen_, {-95, 95}, false, false));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "width", width_, {0, 100}, false, false));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "speed", speed_, {0.1, 10}, false, false));
RETURN_IF_NOT_OK(ValidateScalar("Flanger", "phase", phase_, {0, 100}, false, false));
return Status::OK();
}
std::shared_ptr<TensorOp> FlangerOperation::Build() {
std::shared_ptr<FlangerOp> tensor_op = std::make_shared<FlangerOp>(sample_rate_, delay_, depth_, regen_, width_,
speed_, phase_, modulation_, interpolation_);
return tensor_op;
}
Status FlangerOperation::to_json(nlohmann::json *out_json) {
nlohmann::json args;
args["sample_rate"] = sample_rate_;
args["delay"] = delay_;
args["depth"] = depth_;
args["regen"] = regen_;
args["width"] = width_;
args["speed"] = speed_;
args["phase"] = phase_;
args["modulation"] = modulation_;
args["interpolation"] = interpolation_;
*out_json = args;
return Status::OK();
}
} // namespace audio
} // namespace dataset
} // namespace mindspore

+ 64
- 0
mindspore/ccsrc/minddata/dataset/audio/ir/kernels/flanger_ir.h View File

@@ -0,0 +1,64 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_
#include <memory>
#include <string>
#include <vector>
#include "include/api/status.h"
#include "minddata/dataset/include/dataset/constants.h"
#include "minddata/dataset/include/dataset/transforms.h"
#include "minddata/dataset/kernels/ir/tensor_operation.h"
namespace mindspore {
namespace dataset {
namespace audio {
constexpr char kFlangerOperation[] = "Flanger";
class FlangerOperation : public TensorOperation {
public:
explicit FlangerOperation(int32_t sample_rate, float delay, float depth, float regen, float width, float speed,
float phase, Modulation modulation, Interpolation interpolation);
~FlangerOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kFlangerOperation; }
Status to_json(nlohmann::json *out_json) override;
private:
int32_t sample_rate_;
float delay_;
float depth_;
float regen_;
float width_;
float speed_;
float phase_;
Modulation modulation_;
Interpolation interpolation_;
};
} // namespace audio
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_FLANGER_IR_H_

+ 1
- 0
mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt View File

@@ -18,6 +18,7 @@ add_library(audio-kernels OBJECT
detect_pitch_frequency_op.cc
equalizer_biquad_op.cc
fade_op.cc
flanger_op.cc
frequency_masking_op.cc
highpass_biquad_op.cc
lfilter_op.cc


+ 55
- 0
mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc View File

@@ -725,5 +725,60 @@ Status DetectPitchFrequency(const std::shared_ptr<Tensor> &input, std::shared_pt
RETURN_IF_NOT_OK(Tensor::CreateFromVector(out, out_shape, output));
return Status::OK();
}

Status GenerateWaveTable(std::shared_ptr<Tensor> *output, const DataType &type, Modulation modulation,
int32_t table_size, float min, float max, float phase) {
RETURN_UNEXPECTED_IF_NULL(output);
int32_t phase_offset = static_cast<int32_t>(phase / PI / 2 * table_size + 0.5);
// get the offset of the i-th
std::vector<int32_t> point;
for (auto i = 0; i < table_size; i++) {
point.push_back((i + phase_offset) % table_size);
}

std::shared_ptr<Tensor> wave_table;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({table_size}), DataType(DataType::DE_FLOAT32), &wave_table));

auto iter = wave_table->begin<float>();

if (modulation == Modulation::kSinusoidal) {
for (int i = 0; i < table_size; iter++, i++) {
// change phase
*iter = (sin(point[i] * PI / table_size * 2) + 1) / 2;
}
} else {
for (int i = 0; i < table_size; iter++, i++) {
// change phase
*iter = point[i] * 2.0 / table_size;
// get complete offset
int32_t value = static_cast<int>(4 * point[i] / table_size);
// change the value of the square wave according to the number of complete offsets
if (value == 0) {
*iter = *iter + 0.5;
} else if (value == 1 || value == 2) {
*iter = 1.5 - *iter;
} else if (value == 3) {
*iter = *iter - 1.5;
}
}
}
for (iter = wave_table->begin<float>(); iter != wave_table->end<float>(); iter++) {
*iter = *iter * (max - min) + min;
}
if (type.IsInt()) {
for (iter = wave_table->begin<float>(); iter != wave_table->end<float>(); iter++) {
if (*iter < 0) {
*iter = *iter - 0.5;
} else {
*iter = *iter + 0.5;
}
}
RETURN_IF_NOT_OK(TypeCast(wave_table, output, DataType(DataType::DE_INT32)));
} else if (type.IsFloat()) {
RETURN_IF_NOT_OK(TypeCast(wave_table, output, DataType(DataType::DE_FLOAT32)));
}

return Status::OK();
}
} // namespace dataset
} // namespace mindspore

+ 240
- 0
mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h View File

@@ -25,6 +25,7 @@
#include <vector>

#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/kernels/data/data_utils.h"
#include "minddata/dataset/kernels/tensor_op.h"
#include "minddata/dataset/util/status.h"

@@ -557,6 +558,245 @@ Status MedianSmoothing(const std::shared_ptr<Tensor> &input, std::shared_ptr<Ten
Status DetectPitchFrequency(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate,
float frame_time, int32_t win_length, int32_t freq_low, int32_t freq_high);

/// \brief A helper function for phaser, generates a table with given parameters.
/// \param output: Tensor of shape <time>.
/// \param type: can choose DataType::DE_FLOAT32 or DataType::DE_INT32.
/// \param modulation: Modulation of the input tensor.
/// It can be one of Modulation.kSinusoidal or Modulation.kTriangular.
/// \param table_size: The length of table.
/// \param min: Calculate the sampling rate within the delay time.
/// \param max: Calculate the sampling rate within the delay and delay depth time.
/// \param phase: Phase offset of function.
/// \return Status code.
Status GenerateWaveTable(std::shared_ptr<Tensor> *output, const DataType &type, Modulation modulation,
int32_t table_size, float min, float max, float phase);

/// \brief Flanger about interpolation effect.
/// \param input: Tensor of shape <batch, channel, time>.
/// \param int_delay: A dimensional vector about integer delay, subscript representing delay.
/// \param frac_delay: A dimensional vector about delay obtained by using the frac function.
/// \param interpolation: Interpolation of the input tensor.
/// It can be one of Interpolation::kLinear or Interpolation::kQuadratic.
/// \param delay_buf_pos: Minimum dimension length about delay_bufs.
/// \Returns Flanger about interpolation effect.
template <typename T>
std::vector<std::vector<T>> FlangerInterpolation(const std::shared_ptr<Tensor> &input, std::vector<int> int_delay,
const std::vector<T> &frac_delay, Interpolation interpolation,
int delay_buf_pos) {
int n_batch = input->shape()[0];
int n_channels = input->shape()[-2];
int delay_buf_length = input->shape()[-1];

std::vector<std::vector<T>> delayed_value_a(n_batch, std::vector<T>(n_channels, 0));
std::vector<std::vector<T>> delayed_value_b(n_batch, std::vector<T>(n_channels, 0));
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
// delay after obtaining the current number of channels
auto iter_input = input->begin<T>();
int it = j * n_channels * delay_buf_length + k * delay_buf_length;
iter_input += it + (delay_buf_pos + int_delay[k]) % delay_buf_length;
delayed_value_a[j][k] = *(iter_input);
iter_input = input->begin<T>();
iter_input += it + (delay_buf_pos + int_delay[k] + 1) % delay_buf_length;
delayed_value_b[j][k] = *(iter_input);
}
}
// delay subscript backward
for (int j = 0; j < n_channels; j++) {
int_delay[j] = int_delay[j] + 2;
}
std::vector<std::vector<T>> delayed(n_batch, std::vector<T>(n_channels, 0));
std::vector<std::vector<T>> delayed_value_c(n_batch, std::vector<T>(n_channels, 0));
if (interpolation == Interpolation::kLinear) {
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
delayed[j][k] = delayed_value_a[j][k] + (delayed_value_b[j][k] - delayed_value_a[j][k]) * frac_delay[k];
}
}
} else {
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
auto iter_input = input->begin<T>();
int it = j * n_channels * delay_buf_length + k * delay_buf_length;
iter_input += it + (delay_buf_pos + int_delay[k]) % delay_buf_length;
delayed_value_c[j][k] = *(iter_input);
}
}
// delay subscript backward
for (int j = 0; j < n_channels; j++) {
int_delay[j] = int_delay[j] + 1;
}
std::vector<std::vector<T>> frac_delay_coefficient(n_batch, std::vector<T>(n_channels, 0));
std::vector<std::vector<T>> frac_delay_value(n_batch, std::vector<T>(n_channels, 0));
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
delayed_value_c[j][k] = delayed_value_c[j][k] - delayed_value_a[j][k];
delayed_value_b[j][k] = delayed_value_b[j][k] - delayed_value_a[j][k];
frac_delay_coefficient[j][k] = delayed_value_c[j][k] * 0.5 - delayed_value_b[j][k];
frac_delay_value[j][k] = delayed_value_b[j][k] * 2 - delayed_value_c[j][k] * 0.5;
// the next delay is obtained by delaying the data in the buffer
delayed[j][k] = delayed_value_a[j][k] +
(frac_delay_coefficient[j][k] * frac_delay[k] + frac_delay_value[j][k]) * frac_delay[k];
}
}
}
return delayed;
}

/// \brief Interval limiting function.
/// \param output_waveform: Tensor of shape <..., time>.
/// \param min: If value is less than min, min is returned.
/// \param max: If value is greater than max, max is returned.
/// \Returns Tensor at the same latitude.
template <typename T>
std::shared_ptr<Tensor> Clamp(const std::shared_ptr<Tensor> &tensor, T min, T max) {
for (auto itr = tensor->begin<T>(); itr != tensor->end<T>(); itr++) {
if (*itr > max) {
*itr = max;
} else if (*itr < min) {
*itr = min;
}
}
return tensor;
}

/// \brief Apply flanger effect.
/// \param input/output: Tensor of shape <..., channel, time>.
/// \param sample_rate: Sampling rate of the waveform, e.g. 44100 (Hz), the value can't be zero.
/// \param delay: Desired delay in milliseconds (ms), range: [0, 30].
/// \param depth: Desired delay depth in milliseconds (ms), range: [0, 10].
/// \param regen: Desired regen (feedback gain) in dB., range: [-95, 95].
/// \param width: Desired width (delay gain) in dB, range: [0, 100].
/// \param speed: Modulation speed in Hz, range: [0.1, 10].
/// \param phase: Percentage phase-shift for multi-channel, range: [0, 100].
/// \param modulation: Modulation of the input tensor.
/// It can be one of Modulation::kSinusoidal or Modulation::kTriangular.
/// \param interpolation: Interpolation of the input tensor.
/// It can be one of Interpolation::kLinear or Interpolation::kQuadratic.
/// \return Status code.
template <typename T>
Status Flanger(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int32_t sample_rate, float delay,
float depth, float regen, float width, float speed, float phase, Modulation modulation,
Interpolation interpolation) {
std::shared_ptr<Tensor> waveform;
if (input->type() == DataType::DE_FLOAT64) {
waveform = input;
} else {
RETURN_IF_NOT_OK(TypeCast(input, &waveform, DataType(DataType::DE_FLOAT32)));
}
// convert to 3D (batch, channels, time)
TensorShape actual_shape = waveform->shape();
TensorShape toShape({waveform->Size() / actual_shape[-2] / actual_shape[-1], actual_shape[-2], actual_shape[-1]});
RETURN_IF_NOT_OK(waveform->Reshape(toShape));

// scaling
T feedback_gain = static_cast<T>(regen) / 100;
T delay_gain = static_cast<T>(width) / 100;
T channel_phase = static_cast<T>(phase) / 100;
T delay_min = static_cast<T>(delay) / 1000;
T delay_depth = static_cast<T>(depth) / 1000;

// balance output:
T in_gain = 1.0 / (1 + delay_gain);
delay_gain = delay_gain / (1 + delay_gain);
// balance feedback loop:
delay_gain = delay_gain * (1 - abs(feedback_gain));

int delay_buf_length = static_cast<int>((delay_min + delay_depth) * sample_rate + 0.5);
delay_buf_length = delay_buf_length + 2;

int lfo_length = static_cast<int>(sample_rate / speed);

T table_min = floor(delay_min * sample_rate + 0.5);
T table_max = delay_buf_length - 2.0;
// generate wave table
T lfo_phase = 3 * PI / 2;
std::shared_ptr<Tensor> lfo;
RETURN_IF_NOT_OK(GenerateWaveTable(&lfo, DataType(DataType::DE_FLOAT32), modulation, lfo_length,
static_cast<float>(table_min), static_cast<float>(table_max),
static_cast<float>(lfo_phase)));
int n_batch = waveform->shape()[0];
int n_channels = waveform->shape()[-2];
int time = waveform->shape()[-1];
std::vector<T> delay_tensor(n_channels, 0.0), frac_delay(n_channels, 0.0);
std::vector<int> cur_channel_phase(n_channels, 0), int_delay(n_channels, 0);
// next delay
std::vector<std::vector<T>> delay_last(n_batch, std::vector<T>(n_channels, 0));

// initialization of delay_bufs
TensorShape delay_bufs_shape({n_batch, n_channels, delay_buf_length});
std::shared_ptr<Tensor> delay_bufs, output_waveform;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(delay_bufs_shape, waveform->type(), &delay_bufs));
RETURN_IF_NOT_OK(delay_bufs->Zero());
// initialization of output_waveform
TensorShape output_waveform_shape({n_batch, n_channels, actual_shape[-1]});
RETURN_IF_NOT_OK(Tensor::CreateEmpty(output_waveform_shape, waveform->type(), &output_waveform));

int delay_buf_pos = 0, lfo_pos = 0;
for (int i = 0; i < time; i++) {
delay_buf_pos = (delay_buf_pos + delay_buf_length - 1) % delay_buf_length;
for (int j = 0; j < n_channels; j++) {
// get current channel phase
cur_channel_phase[j] = static_cast<int>(j * lfo_length * channel_phase + 0.5);
// through the current channel phase and lfo arrays to get the delay
auto iter_lfo = lfo->begin<float>();
delay_tensor[j] = *(iter_lfo + (lfo_pos + cur_channel_phase[j]) % lfo_length);
// the frac delay is obtained by using the frac function
frac_delay[j] = delay_tensor[j] - static_cast<int>(delay_tensor[j]);
delay_tensor[j] = floor(delay_tensor[j]);
int_delay[j] = static_cast<int>(delay_tensor[j]);
}
// get the waveform of [:, :, i]
std::shared_ptr<Tensor> temp;
TensorShape temp_shape({n_batch, n_channels});
RETURN_IF_NOT_OK(Tensor::CreateEmpty(temp_shape, waveform->type(), &temp));
Slice ss1(0, n_batch), ss2(0, n_channels), ss3(i, i + 1);
SliceOption sp1(ss1), sp2(ss2), sp3(ss3);
std::vector<SliceOption> slice_option;
slice_option.push_back(sp1), slice_option.push_back(sp2), slice_option.push_back(sp3);
RETURN_IF_NOT_OK(waveform->Slice(&temp, slice_option));

auto iter_temp = temp->begin<T>();
auto iter_delay_bufs = delay_bufs->begin<T>();
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
iter_delay_bufs += delay_buf_pos;
// the value of delay_bufs is processed by next delay
*(iter_delay_bufs) = *iter_temp + delay_last[j][k] * feedback_gain;
iter_delay_bufs -= (delay_buf_pos - delay_buf_length);
iter_temp++;
}
}
// different delayed values can be obtained by judging the type of interpolation
std::vector<std::vector<T>> delayed(n_batch, std::vector<T>(n_channels, 0));
delayed = FlangerInterpolation<T>(delay_bufs, int_delay, frac_delay, interpolation, delay_buf_pos);

for (int j = 0; j < n_channels; j++) {
int_delay[j] = int_delay[j] + 1;
}
iter_temp = temp->begin<T>();
for (int j = 0; j < n_batch; j++) {
for (int k = 0; k < n_channels; k++) {
auto iter_output_waveform = output_waveform->begin<T>();
// update the next delay
delay_last[j][k] = delayed[j][k];
int it = j * n_channels * actual_shape[-1] + k * actual_shape[-1];
iter_output_waveform += it + i;
// the results are obtained by balancing the output and balancing the feedback loop
*(iter_output_waveform) = *(iter_temp)*in_gain + delayed[j][k] * delay_gain;
iter_temp++;
}
}
// update lfo location
lfo_pos = (lfo_pos + 1) % lfo_length;
}
// the output value is limited by the interval limit function
output_waveform = Clamp<T>(output_waveform, -1, 1);
// convert dimension to waveform dimension
RETURN_IF_NOT_OK(output_waveform->Reshape(actual_shape));
RETURN_IF_NOT_OK(TypeCast(output_waveform, output, input->type()));
return Status::OK();
}
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_

+ 57
- 0
mindspore/ccsrc/minddata/dataset/audio/kernels/flanger_op.cc View File

@@ -0,0 +1,57 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/audio/kernels/flanger_op.h"
#include "minddata/dataset/audio/kernels/audio_utils.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
Status FlangerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
// check input dimensions, it should be 2 dimensions or more
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() >= 2,
"Flanger: input tensor is not in shape of <..., channel, time>.");
// check input channel, it should be less than or equal to 4
CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-2] <= 4,
"Flanger: the channel of input tensor must be less than or equal to 4, but got: " +
std::to_string(input->shape()[-2]));
// check input type, it should be [int, float, double]
CHECK_FAIL_RETURN_UNEXPECTED(
input->type().IsNumeric(),
"Flanger: input tensor type should be int, float or double, but got: " + input->type().ToString());
if (input->type() == DataType(DataType::DE_FLOAT64)) {
return Flanger<double>(input, output, sample_rate_, delay_, depth_, regen_, width_, speed_, phase_, Modulation_,
Interpolation_);
} else {
return Flanger<float>(input, output, sample_rate_, delay_, depth_, regen_, width_, speed_, phase_, Modulation_,
Interpolation_);
}
}
Status FlangerOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs));
CHECK_FAIL_RETURN_UNEXPECTED(
inputs[0].IsNumeric(),
"Flanger: input tensor type should be int, float or double, but got: " + inputs[0].ToString());
outputs[0] = inputs[0];
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

+ 72
- 0
mindspore/ccsrc/minddata/dataset/audio/kernels/flanger_op.h View File

@@ -0,0 +1,72 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_
#include <memory>
#include <string>
#include <vector>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/include/dataset/constants.h"
#include "minddata/dataset/kernels/tensor_op.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
class FlangerOp : public TensorOp {
public:
explicit FlangerOp(int32_t sample_rate, float delay, float depth, float regen, float width, float speed, float phase,
Modulation modulation, Interpolation interpolation)
: sample_rate_(sample_rate),
delay_(delay),
depth_(depth),
regen_(regen),
width_(width),
speed_(speed),
phase_(phase),
Modulation_(modulation),
Interpolation_(interpolation) {}
~FlangerOp() override = default;
void Print(std::ostream &out) const override {
out << Name() << ": sample_rate: " << sample_rate_ << ", delay:" << delay_ << ", depth: " << depth_
<< ", regen: " << regen_ << ", width: " << width_ << ", speed: " << speed_ << ", phase: " << phase_
<< ", Modulation: " << Modulation_ << ", Interpolation: " << Interpolation_ << std::endl;
}
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
std::string Name() const override { return kFlangerOp; }
Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
private:
int32_t sample_rate_;
float delay_;
float depth_;
float regen_;
float width_;
float speed_;
float phase_;
Modulation Modulation_;
Interpolation Interpolation_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_FLANGER_OP_H_

+ 32
- 0
mindspore/ccsrc/minddata/dataset/include/dataset/audio.h View File

@@ -374,6 +374,38 @@ class Fade final : public TensorTransform {
std::shared_ptr<Data> data_;
};
/// \brief Apply a flanger effect to the audio.
class Flanger final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
/// \param[in] delay Desired delay in milliseconds (ms), range: [0, 30] (Default: 0.0).
/// \param[in] depth Desired delay depth in milliseconds (ms), range: [0, 10] (Default: 2.0).
/// \param[in] regen Desired regen (feedback gain) in dB., range: [-95, 95] (Default: 0.0).
/// \param[in] width Desired width (delay gain) in dB, range: [0, 100] (Default: 71.0).
/// \param[in] speed Modulation speed in Hz, range: [0.1, 10] (Default: 0.5).
/// \param[in] phase Percentage phase-shift for multi-channel, range: [0, 100] (Default: 25.0).
/// \param[in] modulation Modulation of input tensor, must be one of [Modulation::kSinusoidal,
/// Modulation::kTriangular] (Default:Modulation::kSinusoidal).
/// \param[in] interpolation Interpolation of input tensor, must be one of [Interpolation::kLinear,
/// Interpolation::kQuadratic] (Default:Interpolation::kLinear).
explicit Flanger(int32_t sample_rate, float delay = 0.0, float depth = 2.0, float regen = 0.0, float width = 71.0,
float speed = 0.5, float phase = 25.0, Modulation modulation = Modulation::kSinusoidal,
Interpolation interpolation = Interpolation::kLinear);
/// \brief Destructor.
~Flanger() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief FrequencyMasking TensorTransform.
/// \notes Apply masking to a spectrogram in the frequency domain.
class FrequencyMasking final : public TensorTransform {


+ 12
- 0
mindspore/ccsrc/minddata/dataset/include/dataset/constants.h View File

@@ -26,6 +26,18 @@ namespace dataset {
using uchar = unsigned char;
using dsize_t = int64_t;

/// \brief The modulation in Flanger
enum class Modulation {
kSinusoidal = 0, ///< Use sinusoidal modulation.
kTriangular = 1 ///< Use triangular modulation.
};

/// \brief The interpolation in Flanger
enum class Interpolation {
kLinear = 0, ///< Use linear for delay-line interpolation.
kQuadratic = 1 ///< Use quadratic for delay-line interpolation.
};

/// \brief The color conversion code
enum class ConvertMode {
COLOR_BGR2BGRA = 0, ///< Add alpha channel to BGR image.


+ 1
- 0
mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h View File

@@ -158,6 +158,7 @@ constexpr char kDeemphBiquadOp[] = "DeemphBiquadOp";
constexpr char kDetectPitchFrequencyOp[] = "DetectPitchFrequencyOp";
constexpr char kEqualizerBiquadOp[] = "EqualizerBiquadOp";
constexpr char kFadeOp[] = "FadeOp";
constexpr char kFlangerOp[] = "FlangerOp";
constexpr char kFrequencyMaskingOp[] = "FrequencyMaskingOp";
constexpr char kHighpassBiquadOp[] = "HighpassBiquadOp";
constexpr char kLFilterOp[] = "LFilterOp";


+ 57
- 4
mindspore/dataset/audio/transforms.py View File

@@ -23,12 +23,12 @@ import numpy as np
import mindspore._c_dataengine as cde
from ..transforms.c_transforms import TensorOperation
from .utils import FadeShape, GainType, ScaleType
from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType
from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_contrast, check_dc_shift, \
check_deemph_biquad, check_detect_pitch_frequency, check_equalizer_biquad, check_fade, check_highpass_biquad, \
check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mu_law_decoding, check_riaa_biquad, \
check_time_stretch, check_treble_biquad, check_vol
check_deemph_biquad, check_detect_pitch_frequency, check_equalizer_biquad, check_fade, check_flanger, \
check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mu_law_decoding, \
check_riaa_biquad, check_time_stretch, check_treble_biquad, check_vol
class AudioTensorOperation(TensorOperation):
@@ -498,6 +498,59 @@ class Fade(AudioTensorOperation):
return cde.FadeOperation(self.fade_in_len, self.fade_out_len, DE_C_FADESHAPE_TYPE[self.fade_shape])
DE_C_MODULATION_TYPE = {Modulation.SINUSOIDAL: cde.Modulation.DE_MODULATION_SINUSOIDAL,
Modulation.TRIANGULAR: cde.Modulation.DE_MODULATION_TRIANGULAR}
DE_C_INTERPOLATION_TYPE = {Interpolation.LINEAR: cde.Interpolation.DE_INTERPOLATION_LINEAR,
Interpolation.QUADRATIC: cde.Interpolation.DE_INTERPOLATION_QUADRATIC}
class Flanger(AudioTensorOperation):
"""
Apply a flanger effect to the audio.
Args:
sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz).
delay (float, optional): Desired delay in milliseconds (ms), range: [0, 30] (default=0.0).
depth (float, optional): Desired delay depth in milliseconds (ms), range: [0, 10] (default=2.0).
regen (float, optional): Desired regen (feedback gain) in dB, range: [-95, 95] (default=0.0).
width (float, optional): Desired width (delay gain) in dB, range: [0, 100] (default=71.0).
speed (float, optional): Modulation speed in Hz, range: [0.1, 10] (default=0.5).
phase (float, optional): Percentage phase-shift for multi-channel, range: [0, 100] (default=25.0).
modulation (Modulation, optional): Modulation of the input tensor (default=Modulation.SINUSOIDAL).
It can be one of Modulation.SINUSOIDAL or Modulation.TRIANGULAR.
interpolation (Interpolation, optional): Interpolation of the input tensor (default=Interpolation.LINEAR).
It can be one of Interpolation.LINEAR or Interpolation.QUADRATIC.
Examples:
>>> import numpy as np
>>>
>>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03], [9.246826171875e-03, 1.0894775390625e-02]])
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
>>> transforms = [audio.Flanger(44100)]
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
"""
@check_flanger
def __init__(self, sample_rate, delay=0.0, depth=2.0, regen=0.0, width=71.0, speed=0.5,
phase=25.0, modulation=Modulation.SINUSOIDAL, interpolation=Interpolation.LINEAR):
self.sample_rate = sample_rate
self.delay = delay
self.depth = depth
self.regen = regen
self.width = width
self.speed = speed
self.phase = phase
self.modulation = modulation
self.interpolation = interpolation
def parse(self):
return cde.FlangerOperation(self.sample_rate, self.delay, self.depth, self.regen, self.width, self.speed,
self.phase, DE_C_MODULATION_TYPE[self.modulation],
DE_C_INTERPOLATION_TYPE[self.interpolation])
class FrequencyMasking(AudioTensorOperation):
"""
Apply masking to a spectrogram in the frequency domain.


+ 26
- 0
mindspore/dataset/audio/utils.py View File

@@ -54,6 +54,32 @@ class GainType(str, Enum):
DB: str = "db"


class Interpolation(str, Enum):
"""
Interpolation Type.

Possible enumeration values are: Interpolation.LINEAR, Interpolation.QUADRATIC.

- Interpolation.LINEAR: means input interpolation type is linear.
- Interpolation.QUADRATIC: means input interpolation type is quadratic.
"""
LINEAR: str = "linear"
QUADRATIC: str = "quadratic"


class Modulation(str, Enum):
"""
Modulation Type.

Possible enumeration values are: Modulation.SINUSOIDAL, Modulation.TRIANGULAR.

- Modulation.SINUSOIDAL: means input modulation type is sinusoidal.
- Modulation.TRIANGULAR: means input modulation type is triangular.
"""
SINUSOIDAL: str = "sinusoidal"
TRIANGULAR: str = "triangular"


class ScaleType(str, Enum):
"""
Scale Types.


+ 36
- 1
mindspore/dataset/audio/validators.py View File

@@ -21,7 +21,7 @@ from functools import wraps
from mindspore.dataset.core.validator_helpers import check_float32, check_float32_not_zero, check_int32_not_zero, \
check_list_same_size, check_non_negative_float32, check_non_negative_int32, check_pos_float32, check_pos_int32, \
check_value, parse_user_args, type_check
from .utils import FadeShape, GainType, ScaleType
from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType


def check_amplitude_to_db(method):
@@ -475,3 +475,38 @@ def check_detect_pitch_frequency(method):
return method(self, *args, **kwargs)

return new_method


def check_flanger(method):
"""Wrapper method to check the parameters of Flanger."""

@wraps(method)
def new_method(self, *args, **kwargs):
[sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation], _ = parse_user_args(
method, *args, **kwargs)
type_check(sample_rate, (int,), "sample_rate")
check_int32_not_zero(sample_rate, "sample_rate")

type_check(delay, (float, int), "delay")
check_value(delay, [0, 30], "delay")

type_check(depth, (float, int), "depth")
check_value(depth, [0, 10], "depth")

type_check(regen, (float, int), "regen")
check_value(regen, [-95, 95], "regen")

type_check(width, (float, int), "width")
check_value(width, [0, 100], "width")

type_check(speed, (float, int), "speed")
check_value(speed, [0.1, 10], "speed")

type_check(phase, (float, int), "phase")
check_value(phase, [0, 100], "phase")

type_check(modulation, (Modulation), "modulation")
type_check(interpolation, (Interpolation), "interpolation")
return method(self, *args, **kwargs)

return new_method

+ 109
- 0
tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc View File

@@ -1482,3 +1482,112 @@ TEST_F(MindDataTestPipeline, TestDetectPitchFrequencyParamCheck) {
std::shared_ptr<Iterator> iter05 = ds05->CreateIterator();
EXPECT_EQ(iter05, nullptr);
}

TEST_F(MindDataTestPipeline, TestFlangerBasic) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlangerBasic.";
// Original waveform
std::shared_ptr<SchemaObj> schema = Schema();
ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
std::shared_ptr<Dataset> ds = RandomData(50, schema);
EXPECT_NE(ds, nullptr);

ds = ds->SetNumWorkers(4);
EXPECT_NE(ds, nullptr);

auto FlangerOp = audio::Flanger(44100);

ds = ds->Map({FlangerOp});
EXPECT_NE(ds, nullptr);

// Filtered waveform by flanger
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(ds, nullptr);

std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));

std::vector<int64_t> expected = {2, 200};

int i = 0;
while (row.size() != 0) {
auto col = row["waveform"];
ASSERT_EQ(col.Shape(), expected);
ASSERT_EQ(col.Shape().size(), 2);
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 50);
iter->Stop();
}

TEST_F(MindDataTestPipeline, TestFlangerParamCheck) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlangerParamCheck.";
std::shared_ptr<SchemaObj> schema = Schema();
// Original waveform
ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
std::shared_ptr<Dataset> ds = RandomData(50, schema);
EXPECT_NE(ds, nullptr);

// Check sample_rate
MS_LOG(INFO) << "sample_rate is zero.";
auto flanger_op_sample_rate =
audio::Flanger(0, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsSample_rate = ds->Map({flanger_op_sample_rate});
EXPECT_NE(dsSample_rate, nullptr);
std::shared_ptr<Iterator> iterSample_rate = dsSample_rate->CreateIterator();
EXPECT_EQ(iterSample_rate, nullptr);

// Check delay
MS_LOG(INFO) << "delay is out of range.";
auto flanger_op_delay =
audio::Flanger(44100, 50.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsDelay = ds->Map({flanger_op_delay});
EXPECT_NE(dsDelay, nullptr);
std::shared_ptr<Iterator> iterDelay = dsDelay->CreateIterator();
EXPECT_EQ(iterDelay, nullptr);

// Check depth
MS_LOG(INFO) << "depth is out of range.";
auto flanger_op_depth =
audio::Flanger(44100, 0.0, 20.0, 0.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsDepth = ds->Map({flanger_op_depth});
EXPECT_NE(dsDepth, nullptr);
std::shared_ptr<Iterator> iterDepth = dsDepth->CreateIterator();
EXPECT_EQ(iterDepth, nullptr);

// Check regen
MS_LOG(INFO) << "regen is out of range.";
auto flanger_op_regen =
audio::Flanger(44100, 0.0, 2.0, 100.0, 71.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsRegen = ds->Map({flanger_op_regen});
EXPECT_NE(dsRegen, nullptr);
std::shared_ptr<Iterator> iterRegen = dsRegen->CreateIterator();
EXPECT_EQ(iterRegen, nullptr);

// Check width
MS_LOG(INFO) << "width is out of range.";
auto flanger_op_width =
audio::Flanger(44100, 0.0, 2.0, 0.0, 200.0, 0.5, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsWidth = ds->Map({flanger_op_width});
EXPECT_NE(dsWidth, nullptr);
std::shared_ptr<Iterator> iterWidth = dsWidth->CreateIterator();
EXPECT_EQ(iterWidth, nullptr);

// Check speed
MS_LOG(INFO) << "speed is out of range.";
auto flanger_op_speed =
audio::Flanger(44100, 0.0, 2.0, 0.0, 71.0, 20, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsSpeed = ds->Map({flanger_op_speed});
EXPECT_NE(dsSpeed, nullptr);
std::shared_ptr<Iterator> iterSpeed = dsSpeed->CreateIterator();
EXPECT_EQ(iterSpeed, nullptr);

// Check phase
MS_LOG(INFO) << "phase is out of range.";
auto flanger_op_phase =
audio::Flanger(44100, 0.0, 2.0, 0.0, 71.0, 20, 25.0, Modulation::kSinusoidal, Interpolation::kLinear);
std::shared_ptr<Dataset> dsPhase = ds->Map({flanger_op_phase});
EXPECT_NE(dsPhase, nullptr);
std::shared_ptr<Iterator> iterPhase = dsPhase->CreateIterator();
EXPECT_EQ(iterPhase, nullptr);
}

+ 33
- 0
tests/ut/cpp/dataset/execute_test.cc View File

@@ -1291,3 +1291,36 @@ TEST_F(MindDataTestExecute, TestDetectPitchFrequencyWithWrongArg) {
Status s05 = Transform05(input_02, &input_02);
EXPECT_FALSE(s05.IsOk());
}

TEST_F(MindDataTestExecute, TestFlangerWithEager) {
MS_LOG(INFO) << "Doing MindDataTestExecute-TestFlangerWithEager.";
// Original waveform
std::vector<float> labels = {
2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
std::shared_ptr<Tensor> input;
ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
std::shared_ptr<TensorTransform> flanger_01 = std::make_shared<audio::Flanger>(44100);
mindspore::dataset::Execute Transform01({flanger_01});
// Filtered waveform by flanger
Status s01 = Transform01(input_02, &input_02);
EXPECT_TRUE(s01.IsOk());
}

TEST_F(MindDataTestExecute, TestFlangerWithWrongArg) {
MS_LOG(INFO) << "Doing MindDataTestExecute-TestFlangerWithWrongArg.";
std::vector<double> labels = {1.143, 1.3123, 2.632, 2.554, 1.213, 1.3, 0.456, 3.563};
std::shared_ptr<Tensor> input;
ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({4, 2}), &input));
auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
// Check sample_rate
MS_LOG(INFO) << "sample_rate is zero.";
std::shared_ptr<TensorTransform> flanger_op = std::make_shared<audio::Flanger>(0);
mindspore::dataset::Execute Transform01({flanger_op});
Status s01 = Transform01(input_02, &input_02);
EXPECT_FALSE(s01.IsOk());
}

+ 210
- 0
tests/ut/python/dataset/test_flanger.py View File

@@ -0,0 +1,210 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import pytest

import mindspore.dataset as ds
import mindspore.dataset.audio.transforms as audio
from mindspore import log as logger
from mindspore.dataset.audio.utils import Modulation, Interpolation


def count_unequal_element(data_expected, data_me, rtol, atol):
assert data_expected.shape == data_me.shape
total_count = len(data_expected.flatten())
error = np.abs(data_expected - data_me)
greater = np.greater(error, atol + np.abs(data_expected) * rtol)
loss_count = np.count_nonzero(greater)
assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
data_expected[greater], data_me[greater], error[greater])


def test_flanger_eager_sinusoidal_linear_float64():
""" mindspore eager mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float64)
# Expect waveform
expect_waveform = np.array([[0.10000000000, 0.19999999536, 0.29999998145],
[0.23391812865, 0.29239766081, 0.35087719298]], dtype=np.float64)
flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.SINUSOIDAL, Interpolation.LINEAR)
# Filtered waveform by flanger
output = flanger_op(waveform)
count_unequal_element(expect_waveform, output, 0.0001, 0.0001)


def test_flanger_eager_triangular_linear_float32():
""" mindspore eager mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[-1.2, 2, -3.6], [1, 2.4, 3.7]], dtype=np.float32)
# Expect waveform
expect_waveform = np.array([[-1.0000000000, 1.0000000000, -1.0000000000],
[0.58479529619, 1.0000000000, 1.0000000000]], dtype=np.float32)
flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.TRIANGULAR, Interpolation.LINEAR)
# Filtered waveform by flanger
output = flanger_op(waveform)
count_unequal_element(expect_waveform, output, 0.0001, 0.0001)


def test_flanger_eager_triangular_linear_int():
""" mindspore eager mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[-2, -3, 0], [2, 2, 3]], dtype=np.int)
# Expect waveform
expect_waveform = np.array([[-1, -1, 0],
[1, 1, 1]], dtype=np.int)
flanger_op = audio.Flanger(44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, Modulation.TRIANGULAR, Interpolation.LINEAR)
# Filtered waveform by flanger
output = flanger_op(waveform)
count_unequal_element(expect_waveform, output, 0.0001, 0.0001)



def test_flanger_shape_221():
""" mindspore eager mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[[1], [1.1]], [[0.9], [0.6]]], dtype=np.float64)
# Expect waveform
expect_waveform = np.array([[[1.00000000],
[0.64327485]],

[[0.90000000],
[0.35087719]]], dtype=np.float64)

flanger_op = audio.Flanger(44100)
# Filtered waveform by flanger
output = flanger_op(waveform)
count_unequal_element(expect_waveform, output, 0.0001, 0.0001)


def test_flanger_shape_11211():
""" mindspore eager mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[[[[0.44]], [[0.55]]]]], dtype=np.float64)
# Expect waveform
expect_waveform = np.array([[[[[0.44000000]], [[0.55000000]]]]], dtype=np.float64)

flanger_op = audio.Flanger(44100)
# Filtered waveform by flanger
output = flanger_op(waveform)
count_unequal_element(expect_waveform, output, 0.0001, 0.0001)


def test_flanger_pipeline():
""" mindspore pipeline mode normal testcase:flanger op"""
# Original waveform
waveform = np.array([[[1.1, 1.2, 1.3], [1.4, 1.5, 1.6]]], dtype=np.float64)
# Expect waveform
expect_waveform = np.array([[[1.00000000000, 1.00000000000, 1.00000000000],
[0.81871345029, 0.87719298245, 0.93567251461]]], dtype=np.float64)
data = (waveform, np.random.sample((1, 2, 1)))
dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False)
flanger_op = audio.Flanger(44100)
# Filtered waveform by flanger
dataset = dataset.map(
input_columns=["channel"], operations=flanger_op, num_parallel_workers=1)
i = 0
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
count_unequal_element(expect_waveform[i, :],
item['channel'], 0.0001, 0.0001)
i += 1


def test_invalid_flanger_input():
def test_invalid_input(test_name, sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation,
error, error_msg):
logger.info("Test Flanger with bad input: {0}".format(test_name))
with pytest.raises(error) as error_info:
audio.Flanger(sample_rate, delay, depth, regen, width, speed, phase, modulation, interpolation)
assert error_msg in str(error_info.value)

test_invalid_input("invalid sample_rate parameter value", 0, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument sample_rate with value 44100.5 is not of "
"type [<class 'int'>], but got <class 'float'>.")
test_invalid_input("invalid sample_rate parameter type as a String", "44100", 0.0, 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument sample_rate with value 44100 is not of "
"type [<class 'int'>], but got <class 'str'>.")

test_invalid_input("invalid delay parameter type as a String", 44100, "0.0", 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument delay with value 0.0 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid delay parameter value", 44100, 50, 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input delay is not within the required interval of [0, 30].")

test_invalid_input("invalid depth parameter type as a String", 44100, 0.0, "2.0", 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument depth with value 2.0 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid depth parameter value", 44100, 0.0, 50.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input depth is not within the required interval of [0, 10].")

test_invalid_input("invalid regen parameter type as a String", 44100, 0.0, 2.0, "0.0", 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument regen with value 0.0 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid regen parameter value", 44100, 0.0, 2.0, 100.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input regen is not within the required interval of [-95, 95].")

test_invalid_input("invalid width parameter type as a String", 44100, 0.0, 2.0, 0.0, "71.0", 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument width with value 71.0 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid width parameter value", 44100, 0.0, 2.0, 0.0, 150.0, 0.5, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input width is not within the required interval of [0, 100].")

test_invalid_input("invalid speed parameter type as a String", 44100, 0.0, 2.0, 0.0, 71.0, "0.5", 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument speed with value 0.5 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid speed parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 50, 25.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input speed is not within the required interval of [0.1, 10].")

test_invalid_input("invalid phase parameter type as a String", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, "25.0",
Modulation.SINUSOIDAL, Interpolation.LINEAR, TypeError,
"Argument phase with value 25.0 is not of type [<class 'float'>, <class 'int'>],"
" but got <class 'str'>.")
test_invalid_input("invalid phase parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 150.0,
Modulation.SINUSOIDAL, Interpolation.LINEAR, ValueError,
"Input phase is not within the required interval of [0, 100].")

test_invalid_input("invalid modulation parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0, "test",
Interpolation.LINEAR, TypeError,
"Argument modulation with value test is not of type [<Modulation.SINUSOIDAL: 'sinusoidal'>,"
" <Modulation.TRIANGULAR: 'triangular'>], but got <class 'str'>.")

test_invalid_input("invalid modulation parameter value", 44100, 0.0, 2.0, 0.0, 71.0, 0.5, 25.0,
Modulation.SINUSOIDAL, "test", TypeError,
"Argument interpolation with value test is not of type [<Interpolation.LINEAR: 'linear'>,"
" <Interpolation.QUADRATIC: 'quadratic'>], but got <class 'str'>.")


if __name__ == '__main__':
test_flanger_eager_sinusoidal_linear_float64()
test_flanger_eager_triangular_linear_float32()
test_flanger_eager_triangular_linear_int()
test_flanger_shape_221()
test_flanger_shape_11211()
test_flanger_pipeline()
test_invalid_flanger_input()

Loading…
Cancel
Save