| @@ -41,6 +41,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/magphase_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h" | |||
| @@ -466,6 +467,22 @@ Magphase::Magphase(float power) : data_(std::make_shared<Data>(power)) {} | |||
| std::shared_ptr<TensorOperation> Magphase::Parse() { return std::make_shared<MagphaseOperation>(data_->power_); } | |||
| // MaskAlongAxisIID Transform Operation. | |||
| struct MaskAlongAxisIID::Data { | |||
| Data(int32_t mask_param, float mask_value, int32_t axis) | |||
| : mask_param_(mask_param), mask_value_(mask_value), axis_(axis) {} | |||
| int32_t mask_param_; | |||
| float mask_value_; | |||
| int32_t axis_; | |||
| }; | |||
| MaskAlongAxisIID::MaskAlongAxisIID(int32_t mask_param, float mask_value, int32_t axis) | |||
| : data_(std::make_shared<Data>(mask_param, mask_value, axis)) {} | |||
| std::shared_ptr<TensorOperation> MaskAlongAxisIID::Parse() { | |||
| return std::make_shared<MaskAlongAxisIIDOperation>(data_->mask_param_, data_->mask_value_, data_->axis_); | |||
| } | |||
| // MelScale Transform Operation. | |||
| struct MelScale::Data { | |||
| Data(int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t n_stft, NormType norm, MelType mel_type) | |||
| @@ -45,6 +45,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/magphase_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h" | |||
| @@ -375,6 +376,17 @@ PYBIND_REGISTER(MagphaseOperation, 1, ([](const py::module *m) { | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER(MaskAlongAxisIIDOperation, 1, ([](const py::module *m) { | |||
| (void)py::class_<audio::MaskAlongAxisIIDOperation, TensorOperation, | |||
| std::shared_ptr<audio::MaskAlongAxisIIDOperation>>(*m, "MaskAlongAxisIIDOperation") | |||
| .def(py::init([](int32_t mask_param, float mask_value, int32_t axis) { | |||
| auto mask_along_axis_iid = | |||
| std::make_shared<audio::MaskAlongAxisIIDOperation>(mask_param, mask_value, axis); | |||
| THROW_IF_ERROR(mask_along_axis_iid->ValidateParams()); | |||
| return mask_along_axis_iid; | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER(MelScaleOperation, 1, ([](const py::module *m) { | |||
| (void) | |||
| py::class_<audio::MelScaleOperation, TensorOperation, std::shared_ptr<audio::MelScaleOperation>>( | |||
| @@ -27,6 +27,7 @@ add_library(audio-ir-kernels OBJECT | |||
| lfilter_ir.cc | |||
| lowpass_biquad_ir.cc | |||
| magphase_ir.cc | |||
| mask_along_axis_iid_ir.cc | |||
| mel_scale_ir.cc | |||
| mu_law_decoding_ir.cc | |||
| mu_law_encoding_ir.cc | |||
| @@ -0,0 +1,55 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h" | |||
| #include "minddata/dataset/audio/ir/validators.h" | |||
| #include "minddata/dataset/audio/kernels/mask_along_axis_iid_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| MaskAlongAxisIIDOperation::MaskAlongAxisIIDOperation(int32_t mask_param, float mask_value, int32_t axis) | |||
| : mask_param_(mask_param), mask_value_(mask_value), axis_(axis) { | |||
| random_op_ = true; | |||
| } | |||
| MaskAlongAxisIIDOperation::~MaskAlongAxisIIDOperation() = default; | |||
| Status MaskAlongAxisIIDOperation::ValidateParams() { | |||
| RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MaskAlongAxisIID", "mask_param", mask_param_)); | |||
| RETURN_IF_NOT_OK(ValidateScalarValue("MaskAlongAxisIID", "axis", axis_, {1, 2})); | |||
| return Status::OK(); | |||
| } | |||
| std::string MaskAlongAxisIIDOperation::Name() const { return kMaskAlongAxisIIDOperation; } | |||
| std::shared_ptr<TensorOp> MaskAlongAxisIIDOperation::Build() { | |||
| std::shared_ptr<MaskAlongAxisIIDOp> tensor_op = std::make_shared<MaskAlongAxisIIDOp>(mask_param_, mask_value_, axis_); | |||
| return tensor_op; | |||
| } | |||
| Status MaskAlongAxisIIDOperation::to_json(nlohmann::json *out_json) { | |||
| nlohmann::json args; | |||
| args["mask_param"] = mask_param_; | |||
| args["mask_value"] = mask_value_; | |||
| args["axis"] = axis_; | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MASK_ALONG_AXIS_IID_IR_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MASK_ALONG_AXIS_IID_IR_H_ | |||
| #include <map> | |||
| #include <memory> | |||
| #include <random> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "include/api/status.h" | |||
| #include "minddata/dataset/include/dataset/constants.h" | |||
| #include "minddata/dataset/kernels/ir/tensor_operation.h" | |||
| #include "minddata/dataset/util/random.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| constexpr char kMaskAlongAxisIIDOperation[] = "MaskAlongAxisIID"; | |||
| class MaskAlongAxisIIDOperation : public TensorOperation { | |||
| public: | |||
| MaskAlongAxisIIDOperation(int32_t mask_param, float mask_value, int32_t axis); | |||
| ~MaskAlongAxisIIDOperation(); | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| Status ValidateParams() override; | |||
| std::string Name() const override; | |||
| Status to_json(nlohmann::json *out_json) override; | |||
| private: | |||
| int32_t mask_param_; | |||
| float mask_value_; | |||
| int32_t axis_; | |||
| }; | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MASK_ALONG_AXIS_IID_IR_H_ | |||
| @@ -28,6 +28,7 @@ add_library(audio-kernels OBJECT | |||
| lfilter_op.cc | |||
| lowpass_biquad_op.cc | |||
| magphase_op.cc | |||
| mask_along_axis_iid_op.cc | |||
| mel_scale_op.cc | |||
| mu_law_decoding_op.cc | |||
| mu_law_encoding_op.cc | |||
| @@ -562,6 +562,13 @@ Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso | |||
| std::to_string(mask_start) + ", 'mask_width' " + std::to_string(mask_width) + " and length " + | |||
| std::to_string(input_shape[check_dim_ind])); | |||
| if (mask_width == 0) { | |||
| // unsqueeze input | |||
| (void)input->Reshape(input_shape); | |||
| *output = input; | |||
| return Status::OK(); | |||
| } | |||
| int32_t cell_size = input->type().SizeInBytes(); | |||
| if (axis == 1) { | |||
| @@ -578,7 +585,8 @@ Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso | |||
| "MaskAlongAxis: mask failed, memory copy error."); | |||
| } else { | |||
| // tensor float 64 | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0, | |||
| auto mask_val = static_cast<double>(mask_value); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0, | |||
| "MaskAlongAxis: mask failed, memory copy error."); | |||
| } | |||
| } | |||
| @@ -595,7 +603,8 @@ Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso | |||
| "MaskAlongAxis: mask failed, memory copy error."); | |||
| } else { | |||
| // tensor float 64 | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0, | |||
| auto mask_val = static_cast<double>(mask_value); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0, | |||
| "MaskAlongAxis: mask failed, memory copy error."); | |||
| } | |||
| } | |||
| @@ -0,0 +1,71 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/kernels/mask_along_axis_iid_op.h" | |||
| #include "minddata/dataset/audio/kernels/audio_utils.h" | |||
| #include "minddata/dataset/kernels/data/data_utils.h" | |||
| #include "minddata/dataset/util/random.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| const int32_t kFrequencyAxis = 1; | |||
| const int32_t kTimeAxis = 2; | |||
| const int32_t kTensorFreqiencyPos = -2; | |||
| const int32_t kTensorTimePos = -1; | |||
| Status MaskAlongAxisIIDOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| RETURN_IF_NOT_OK(ValidateLowRank("MaskAlongAxisIID", input, kDefaultAudioDim, "<..., freq, time>")); | |||
| RETURN_IF_NOT_OK(ValidateTensorType("MaskAlongAxisIID", input->type().IsNumeric(), "[int, float, double]", | |||
| input->type().ToString())); | |||
| TensorShape input_shape = input->shape(); | |||
| if (axis_ == kFrequencyAxis) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input_shape[kTensorFreqiencyPos] >= mask_param_, | |||
| "MaskAlongAxisIID: mask_param should be less than or equal to the length of frequency dimension."); | |||
| } else if (axis_ == kTimeAxis) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input_shape[kTensorTimePos] >= mask_param_, | |||
| "MaskAlongAxisIID: mask_param should be less than or equal to the length of time dimension."); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("MaskAlongAxisIID: only support Frequency and Time masking, axis should be 1 or 2."); | |||
| } | |||
| std::shared_ptr<Tensor> input_tensor; | |||
| if (input->type() != DataType::DE_FLOAT64) { | |||
| RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32))); | |||
| } else { | |||
| input_tensor = input; | |||
| } | |||
| return RandomMaskAlongAxis(input_tensor, output, mask_param_, mask_value_, axis_, rnd_); | |||
| } | |||
| Status MaskAlongAxisIIDOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) { | |||
| RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs)); | |||
| RETURN_IF_NOT_OK( | |||
| ValidateTensorType("MaskAlongAxisIID", inputs[0].IsNumeric(), "[int, float, double]", inputs[0].ToString())); | |||
| if (inputs[0] == DataType(DataType::DE_FLOAT64)) { | |||
| outputs[0] = DataType(DataType::DE_FLOAT64); | |||
| } else { | |||
| outputs[0] = DataType(DataType::DE_FLOAT32); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,60 @@ | |||
| /** | |||
| * Copyright 2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MASK_ALONG_AXIS_IID_OP_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MASK_ALONG_AXIS_IID_OP_H_ | |||
| #include <memory> | |||
| #include <random> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/kernels/tensor_op.h" | |||
| #include "minddata/dataset/util/random.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class MaskAlongAxisIIDOp : public TensorOp { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] mask_param Number of columns to be masked, will be uniformly sampled from [0, mask_param], | |||
| /// must be non negative. | |||
| /// \param[in] mask_value Value to assign to the masked columns. | |||
| /// \param[in] axis Axis to apply masking on (1 for frequency and 2 for time). | |||
| MaskAlongAxisIIDOp(int32_t mask_param, float mask_value, int32_t axis) | |||
| : mask_param_(mask_param), mask_value_(mask_value), axis_(axis) { | |||
| rnd_.seed(GetSeed()); | |||
| is_deterministic_ = false; | |||
| } | |||
| ~MaskAlongAxisIIDOp() override = default; | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override; | |||
| std::string Name() const override { return kMaskAlongAxisIIDOp; } | |||
| private: | |||
| int32_t mask_param_; | |||
| float mask_value_; | |||
| int32_t axis_; | |||
| std::mt19937 rnd_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MASK_ALONG_AXIS_IID_OP_H_ | |||
| @@ -616,6 +616,30 @@ class MS_API Magphase final : public TensorTransform { | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief MaskAlongAxisIID TensorTransform. | |||
| /// \note Apply a mask along axis. | |||
| class MaskAlongAxisIID final : public TensorTransform { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] mask_param Number of columns to be masked, will be uniformly sampled from [0, mask_param], | |||
| /// must be non negative. | |||
| /// \param[in] mask_value Value to assign to the masked columns. | |||
| /// \param[in] axis Axis to apply masking on (1 for frequency and 2 for time). | |||
| MaskAlongAxisIID(int32_t mask_param, float mask_value, int32_t axis); | |||
| /// \brief Destructor. | |||
| ~MaskAlongAxisIID() = default; | |||
| protected: | |||
| /// \brief Function to convert TensorTransform object into a TensorOperation object. | |||
| /// \return Shared pointer to TensorOperation object. | |||
| std::shared_ptr<TensorOperation> Parse() override; | |||
| private: | |||
| struct Data; | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief MelScale TensorTransform. | |||
| /// \notes Convert normal STFT to STFT at the Mel scale. | |||
| class MS_API MelScale final : public TensorTransform { | |||
| @@ -171,6 +171,7 @@ constexpr char kHighpassBiquadOp[] = "HighpassBiquadOp"; | |||
| constexpr char kLFilterOp[] = "LFilterOp"; | |||
| constexpr char kLowpassBiquadOp[] = "LowpassBiquadOp"; | |||
| constexpr char kMagphaseOp[] = "MagphaseOp"; | |||
| constexpr char kMaskAlongAxisIIDOp[] = "MaskAlongAxisIIDOp"; | |||
| constexpr char kMelScaleOp[] = "MelScaleOp"; | |||
| constexpr char kMuLawDecodingOp[] = "MuLawDecodingOp"; | |||
| constexpr char kMuLawEncodingOp[] = "MuLawEncodingOp"; | |||
| @@ -29,9 +29,10 @@ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_ | |||
| check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \ | |||
| check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \ | |||
| check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_highpass_biquad, \ | |||
| check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mel_scale, check_mu_law_coding, \ | |||
| check_overdrive, check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, \ | |||
| check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vol | |||
| check_lfilter, check_lowpass_biquad, check_magphase, check_mask_along_axis_iid, check_masking, check_mel_scale, \ | |||
| check_mu_law_coding, check_overdrive, check_phase_vocoder, check_phaser, check_riaa_biquad, \ | |||
| check_sliding_window_cmn, check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, \ | |||
| check_vol | |||
| class AudioTensorOperation(TensorOperation): | |||
| @@ -949,6 +950,37 @@ class Magphase(AudioTensorOperation): | |||
| return cde.MagphaseOperation(self.power) | |||
| class MaskAlongAxisIID(AudioTensorOperation): | |||
| """ | |||
| Apply a mask along `axis`. Mask will be applied from indices `[mask_start, mask_start + mask_width)`, where | |||
| `mask_width` is sampled from `uniform[0, mask_param]`, and `mask_start` from `uniform[0, max_length - mask_width]`, | |||
| `max_length` is the number of columns of the specified axis of the spectrogram. | |||
| Args: | |||
| mask_param (int): Number of columns to be masked, will be uniformly sampled from | |||
| [0, mask_param], must be non negative. | |||
| mask_value (float): Value to assign to the masked columns. | |||
| axis (int): Axis to apply masking on (1 for frequency and 2 for time). | |||
| Examples: | |||
| >>> import numpy as np | |||
| >>> | |||
| >>> waveform= np.random.random(1, 20, 20) | |||
| >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"]) | |||
| >>> transforms = [audio.MaskAlongAxisIID(5, 0.5, 2)] | |||
| >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"]) | |||
| """ | |||
| @check_mask_along_axis_iid | |||
| def __init__(self, mask_param, mask_value, axis): | |||
| self.mask_param = mask_param | |||
| self.mask_value = mask_value | |||
| self.axis = axis | |||
| def parse(self): | |||
| return cde.MaskAlongAxisIIDOperation(self.mask_param, self.mask_value, self.axis) | |||
| DE_C_MEL_TYPE = {MelType.SLANEY: cde.MelType.DE_MEL_TYPE_SLANEY, | |||
| MelType.HTK: cde.MelType.DE_MEL_TYPE_HTK} | |||
| @@ -321,6 +321,22 @@ def check_lowpass_biquad(method): | |||
| return new_method | |||
| def check_mask_along_axis_iid(method): | |||
| """Wrapper method to check the parameters of MaskAlongAxisIID.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [mask_param, mask_value, axis], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(mask_param, (int,), "mask_param") | |||
| check_non_negative_int32(mask_param, "mask_param") | |||
| type_check(mask_value, (int, float,), "mask_value") | |||
| check_float32(mask_value, "mask_value") | |||
| type_check(axis, (int,), "axis") | |||
| check_value(axis, [1, 2], "axis") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_mu_law_coding(method): | |||
| """Wrapper method to check the parameters of MuLawDecoding and MuLawEncoding""" | |||
| @@ -2341,3 +2341,107 @@ TEST_F(MindDataTestPipeline, TestPhaseVocoderWrongArgs) { | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| /// Feature: MaskAlongAxisIID | |||
| /// Description: test MaskAlongAxisIID pipeline | |||
| /// Expectation: the returned result is as expected | |||
| TEST_F(MindDataTestPipeline, TestMaskAlongAxisIIDPipeline) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskAlongAxisIIDPipeline."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 1, 200, 200})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| ds = ds->SetNumWorkers(4); | |||
| EXPECT_NE(ds, nullptr); | |||
| int mask_param = 40; | |||
| float mask_value = 1.0; | |||
| int axis = 1; | |||
| auto MaskAlongAxisIID = audio::MaskAlongAxisIID(mask_param, mask_value, axis); | |||
| ds = ds->Map({MaskAlongAxisIID}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| // Now the parameter check for RandomNode would fail and we would end up with a nullptr iter. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(ds, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| std::vector<int64_t> expected = {1, 1, 200, 200}; | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 4); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| iter->Stop(); | |||
| } | |||
| /// Feature: MaskAlongAxisIID | |||
| /// Description: test MaskAlongAxisIID wrong args | |||
| /// Expectation: the returned result is as expected | |||
| TEST_F(MindDataTestPipeline, TestMaskAlongAxisIIDInvalidMaskParam) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskAlongAxisIIDInvalidMaskParam."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 1, 20, 20})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| ds = ds->SetNumWorkers(4); | |||
| EXPECT_NE(ds, nullptr); | |||
| // The negative mask_param is invalid | |||
| int mask_param = -10; | |||
| float mask_value = 1.0; | |||
| int axis = 2; | |||
| auto MaskAlongAxisIID = audio::MaskAlongAxisIID(mask_param, mask_value, axis); | |||
| ds = ds->Map({MaskAlongAxisIID}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| // Now the parameter check for RandomNode would fail and we would end up with a nullptr iter. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| /// Feature: MaskAlongAxisIID | |||
| /// Description: test MaskAlongAxisIID wrong axis | |||
| /// Expectation: the returned result is as expected | |||
| TEST_F(MindDataTestPipeline, TestMaskAlongAxisInvaildAxis) { | |||
| MS_LOG(INFO) << "MindDataTestPipeline-TestMaskAlongAxisInvaildAxis."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 1, 20, 20})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| ds = ds->SetNumWorkers(4); | |||
| EXPECT_NE(ds, nullptr); | |||
| // The axis value is invilid | |||
| int mask_param = 10; | |||
| float mask_value = 1.0; | |||
| int axis = 0; | |||
| auto MaskAlongAxisIID = audio::MaskAlongAxisIID(mask_param, mask_value, axis); | |||
| ds = ds->Map({MaskAlongAxisIID}); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| // Now the parameter check for RandomNode would fail and we would end up with a nullptr iter. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| @@ -1036,6 +1036,23 @@ TEST_F(MindDataTestExecute, TestOverdriveBasicWithEager) { | |||
| EXPECT_TRUE(s01.IsOk()); | |||
| } | |||
| /// Feature: MaskAlongAxisIID | |||
| /// Description: test MaskAlongAxisIID | |||
| /// Expectation: the returned result is as expected | |||
| TEST_F(MindDataTestExecute, TestMaskAlongAxisIID) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-TestMaskAlongAxisIID."; | |||
| // testing | |||
| std::shared_ptr<Tensor> input; | |||
| TensorShape s = TensorShape({1, 1, 4, 4}); | |||
| ASSERT_OK(Tensor::CreateFromVector(std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, | |||
| 2.0f, 1.0f, 4.0f, 3.0f, 2.0f, 1.0f}), s, &input)); | |||
| auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input)); | |||
| std::shared_ptr<TensorTransform> mask_along_axisiid_op = std::make_shared<audio::MaskAlongAxisIID>(3, 9.0, 2); | |||
| mindspore::dataset::Execute transform({mask_along_axisiid_op}); | |||
| Status status = transform(input_tensor, &input_tensor); | |||
| EXPECT_TRUE(status.IsOk()); | |||
| } | |||
| /// Feature: Overdrive | |||
| /// Description: test invalid parameter of Overdrive | |||
| /// Expectation: throw exception correctly | |||
| @@ -0,0 +1,124 @@ | |||
| # Copyright 2022 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| import copy | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.audio.transforms as audio | |||
| from mindspore import log as logger | |||
| BATCH = 2 | |||
| CHANNEL = 2 | |||
| FREQ = 10 | |||
| TIME = 10 | |||
| def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): | |||
| """ | |||
| Precision calculation formula | |||
| """ | |||
| if np.any(np.isnan(data_expected)): | |||
| assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan) | |||
| elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan): | |||
| count_unequal_element(data_expected, data_me, rtol, atol) | |||
| def count_unequal_element(data_expected, data_me, rtol, atol): | |||
| """ | |||
| Precision calculation func | |||
| """ | |||
| assert data_expected.shape == data_me.shape | |||
| total_count = len(data_expected.flatten()) | |||
| error = np.abs(data_expected - data_me) | |||
| greater = np.greater(error, atol + np.abs(data_expected) * rtol) | |||
| loss_count = np.count_nonzero(greater) | |||
| assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( | |||
| data_expected[greater], data_me[greater], error[greater]) | |||
| def gen(shape): | |||
| np.random.seed(0) | |||
| data = np.random.random(shape) | |||
| yield (np.array(data, dtype=np.float32),) | |||
| def test_mask_along_axis_iid_eager(): | |||
| """ | |||
| Feature: MaskAlongAxisIID | |||
| Description: mindspore eager mode with normal testcase | |||
| Expectation: the returned result is as expected | |||
| """ | |||
| logger.info("test MaskAlongAxisIID op, eager") | |||
| spectrogram_01 = next(gen((BATCH, CHANNEL, FREQ, TIME)))[0] | |||
| output_01 = audio.MaskAlongAxisIID(mask_param=8, mask_value=5.0, axis=1)(spectrogram_01) | |||
| assert output_01.shape == (BATCH, CHANNEL, FREQ, TIME) | |||
| spectrogram_02 = next(gen((BATCH, CHANNEL, FREQ, TIME)))[0] | |||
| expect_output = copy.deepcopy(spectrogram_02) | |||
| output_02 = audio.MaskAlongAxisIID(mask_param=0, mask_value=5.0, axis=1)(spectrogram_02) | |||
| allclose_nparray(output_02, expect_output, 0.0001, 0.0001) | |||
| def test_mask_along_axis_iid_pipeline(): | |||
| """ | |||
| Feature: MaskAlongAxisIID | |||
| Description: mindspore pipeline mode with normal testcase | |||
| Expectation: the returned result is as expected | |||
| """ | |||
| logger.info("test MaskAlongAxisIID op, pipeline") | |||
| generator = gen([BATCH, CHANNEL, FREQ, TIME]) | |||
| data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"]) | |||
| transforms = [audio.MaskAlongAxisIID(mask_param=8, mask_value=5.0, axis=2)] | |||
| data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| out_put = item["multi_dimensional_data"] | |||
| assert out_put.shape == (BATCH, CHANNEL, FREQ, TIME) | |||
| def test_mask_along_axis_iid_invalid_input(): | |||
| """ | |||
| Feature: MaskAlongAxisIID | |||
| Description: mindspore eager mode with invalid input | |||
| Expectation: the returned result is as expected | |||
| """ | |||
| def test_invalid_param(test_name, mask_param, mask_value, axis, error, error_msg): | |||
| """ | |||
| a function used for checking correct error and message | |||
| """ | |||
| logger.info("Test MaskAlongAxisIID with wrong params: {0}".format(test_name)) | |||
| with pytest.raises(error) as error_info: | |||
| audio.MaskAlongAxisIID(mask_param, mask_value, axis) | |||
| assert error_msg in str(error_info.value) | |||
| test_invalid_param("invalid mask_param", 1.0, 1.0, 1, TypeError, | |||
| "Argument mask_param with value 1.0 is not of type [<class 'int'>], but got <class 'float'>.") | |||
| test_invalid_param("invalid mask_param", -1, 1.0, 1, ValueError, | |||
| "Input mask_param is not within the required interval of [0, 2147483647].") | |||
| test_invalid_param("invalid axis", 5, 1.0, 5.0, TypeError, | |||
| "Argument axis with value 5.0 is not of type [<class 'int'>], but got <class 'float'>.") | |||
| test_invalid_param("invalid axis", 5, 1.0, 0, ValueError, | |||
| "Input axis is not within the required interval of [1, 2].") | |||
| test_invalid_param("invalid axis", 5, 1.0, 3, ValueError, | |||
| "Input axis is not within the required interval of [1, 2].") | |||
| if __name__ == "__main__": | |||
| test_mask_along_axis_iid_eager() | |||
| test_mask_along_axis_iid_invalid_input() | |||
| test_mask_along_axis_iid_pipeline() | |||