Merge pull request !27487 from 杨旭华/SpectralCentroidOptags/v1.6.0
| @@ -47,6 +47,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/phaser_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/riaa_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/sliding_window_cmn_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/spectral_centroid_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/spectrogram_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/time_masking_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h" | |||
| @@ -584,6 +585,32 @@ struct Spectrogram::Data { | |||
| bool onesided_; | |||
| }; | |||
| // SpectralCentroid Transform Operation. | |||
| struct SpectralCentroid::Data { | |||
| Data(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window) | |||
| : sample_rate_(sample_rate), | |||
| n_fft_(n_fft), | |||
| win_length_(win_length), | |||
| hop_length_(hop_length), | |||
| pad_(pad), | |||
| window_(window) {} | |||
| int32_t sample_rate_; | |||
| int32_t n_fft_; | |||
| int32_t win_length_; | |||
| int32_t hop_length_; | |||
| int32_t pad_; | |||
| WindowType window_; | |||
| }; | |||
| SpectralCentroid::SpectralCentroid(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, | |||
| int32_t pad, WindowType window) | |||
| : data_(std::make_shared<Data>(sample_rate, n_fft, win_length, hop_length, pad, window)) {} | |||
| std::shared_ptr<TensorOperation> SpectralCentroid::Parse() { | |||
| return std::make_shared<SpectralCentroidOperation>(data_->sample_rate_, data_->n_fft_, data_->win_length_, | |||
| data_->hop_length_, data_->pad_, data_->window_); | |||
| } | |||
| Spectrogram::Spectrogram(int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, WindowType window, | |||
| float power, bool normalized, bool center, BorderType pad_mode, bool onesided) | |||
| : data_(std::make_shared<Data>(n_fft, win_length, hop_length, pad, window, power, normalized, center, pad_mode, | |||
| @@ -51,6 +51,7 @@ | |||
| #include "minddata/dataset/audio/ir/kernels/phaser_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/riaa_biquad_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/sliding_window_cmn_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/spectral_centroid_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/spectrogram_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/time_masking_ir.h" | |||
| #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h" | |||
| @@ -450,6 +451,19 @@ PYBIND_REGISTER(WindowType, 0, ([](const py::module *m) { | |||
| .export_values(); | |||
| })); | |||
| PYBIND_REGISTER( | |||
| SpectralCentroidOperation, 1, ([](const py::module *m) { | |||
| (void) | |||
| py::class_<audio::SpectralCentroidOperation, TensorOperation, std::shared_ptr<audio::SpectralCentroidOperation>>( | |||
| *m, "SpectralCentroidOperation") | |||
| .def(py::init([](int sample_rate, int n_fft, int win_length, int hop_length, int pad, WindowType window) { | |||
| auto spectral_centroid = | |||
| std::make_shared<audio::SpectralCentroidOperation>(sample_rate, n_fft, win_length, hop_length, pad, window); | |||
| THROW_IF_ERROR(spectral_centroid->ValidateParams()); | |||
| return spectral_centroid; | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER( | |||
| SpectrogramOperation, 1, ([](const py::module *m) { | |||
| (void)py::class_<audio::SpectrogramOperation, TensorOperation, std::shared_ptr<audio::SpectrogramOperation>>( | |||
| @@ -33,6 +33,7 @@ add_library(audio-ir-kernels OBJECT | |||
| phaser_ir.cc | |||
| riaa_biquad_ir.cc | |||
| sliding_window_cmn_ir.cc | |||
| spectral_centroid_ir.cc | |||
| spectrogram_ir.cc | |||
| time_masking_ir.cc | |||
| time_stretch_ir.cc | |||
| @@ -0,0 +1,67 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/ir/kernels/spectral_centroid_ir.h" | |||
| #include "minddata/dataset/audio/ir/validators.h" | |||
| #include "minddata/dataset/audio/kernels/spectral_centroid_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| // SpectralCentroidOperation | |||
| SpectralCentroidOperation::SpectralCentroidOperation(int32_t sample_rate, int32_t n_fft, int32_t win_length, | |||
| int32_t hop_length, int32_t pad, WindowType window) | |||
| : sample_rate_(sample_rate), | |||
| n_fft_(n_fft), | |||
| win_length_(win_length), | |||
| hop_length_(hop_length), | |||
| pad_(pad), | |||
| window_(window) {} | |||
| Status SpectralCentroidOperation::ValidateParams() { | |||
| RETURN_IF_NOT_OK(ValidateIntScalarPositive("SpectralCentroid", "sample_rate", sample_rate_)); | |||
| RETURN_IF_NOT_OK(ValidateIntScalarPositive("SpectralCentroid", "n_fft", n_fft_)); | |||
| RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("SpectralCentroid", "win_length", win_length_)); | |||
| RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("SpectralCentroid", "hop_length", hop_length_)); | |||
| RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("SpectralCentroid", "pad", pad_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| win_length_ <= n_fft_, "SpectralCentroid: win_length must be less than or equal to n_fft, but got win_length: " + | |||
| std::to_string(win_length_) + ", n_fft: " + std::to_string(n_fft_)); | |||
| return Status::OK(); | |||
| } | |||
| std::shared_ptr<TensorOp> SpectralCentroidOperation::Build() { | |||
| int32_t win_length = (win_length_ == 0) ? n_fft_ : win_length_; | |||
| int32_t hop_length = (hop_length_ == 0) ? win_length / 2 : hop_length_; | |||
| std::shared_ptr<SpectralCentroidOp> tensor_op = | |||
| std::make_shared<SpectralCentroidOp>(sample_rate_, n_fft_, win_length, hop_length, pad_, window_); | |||
| return tensor_op; | |||
| } | |||
| Status SpectralCentroidOperation::to_json(nlohmann::json *out_json) { | |||
| nlohmann::json args; | |||
| args["sample_rate"] = sample_rate_; | |||
| args["n_fft"] = n_fft_; | |||
| args["win_length"] = win_length_; | |||
| args["hop_length"] = hop_length_; | |||
| args["pad"] = pad_; | |||
| args["window"] = window_; | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_SPECTRAL_CENTROID_IR_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_SPECTRAL_CENTROID_IR_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include "include/api/status.h" | |||
| #include "minddata/dataset/kernels/ir/tensor_operation.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace audio { | |||
| constexpr char kSpectralCentroidOperation[] = "SpectralCentroid"; | |||
| class SpectralCentroidOperation : public TensorOperation { | |||
| public: | |||
| SpectralCentroidOperation(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, | |||
| WindowType window); | |||
| ~SpectralCentroidOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| Status ValidateParams() override; | |||
| std::string Name() const override { return kSpectralCentroidOperation; } | |||
| Status to_json(nlohmann::json *out_json) override; | |||
| private: | |||
| int32_t sample_rate_; | |||
| int32_t n_fft_; | |||
| int32_t win_length_; | |||
| int32_t hop_length_; | |||
| int32_t pad_; | |||
| WindowType window_; | |||
| }; | |||
| } // namespace audio | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_SPECTRAL_CENTROID_IR_H_ | |||
| @@ -34,6 +34,7 @@ add_library(audio-kernels OBJECT | |||
| phaser_op.cc | |||
| riaa_biquad_op.cc | |||
| sliding_window_cmn_op.cc | |||
| spectral_centroid_op.cc | |||
| spectrogram_op.cc | |||
| time_masking_op.cc | |||
| time_stretch_op.cc | |||
| @@ -1511,6 +1511,84 @@ Status Spectrogram(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> | |||
| } | |||
| } | |||
| template <typename T> | |||
| Status SpectralCentroidImpl(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int sample_rate, | |||
| int n_fft, int win_length, int hop_length, int pad, WindowType window) { | |||
| std::shared_ptr<Tensor> output_tensor; | |||
| std::shared_ptr<Tensor> spectrogram_tensor; | |||
| if (input->type() == DataType::DE_FLOAT64) { | |||
| SpectrogramImpl<double>(input, &spectrogram_tensor, pad, window, n_fft, hop_length, win_length, 1.0, false, true, | |||
| BorderType::kReflect, true); | |||
| } else { | |||
| SpectrogramImpl<float>(input, &spectrogram_tensor, pad, window, n_fft, hop_length, win_length, 1.0, false, true, | |||
| BorderType::kReflect, true); | |||
| } | |||
| std::shared_ptr<Tensor> freqs; | |||
| // sample_rate / TWO is half of sample_rate and n_fft / TWO is half of n_fft | |||
| RETURN_IF_NOT_OK(Linspace<T>(&freqs, 0, sample_rate / TWO, 1 + n_fft / TWO)); | |||
| auto itr_freq = freqs->begin<T>(); | |||
| int num = freqs->Size(); | |||
| TensorShape spectrogram_shape = spectrogram_tensor->shape(); | |||
| int waveform = spectrogram_shape[-1]; | |||
| int channals = spectrogram_shape[-2]; | |||
| std::vector output_shape = spectrogram_shape.AsVector(); | |||
| output_shape[output_shape.size() - TWO] = 1; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{output_shape}, input->type(), &output_tensor)); | |||
| Eigen::MatrixXd freqs_r = Eigen::MatrixXd::Zero(num, 1); | |||
| for (int i = 0; i < num; ++i) { | |||
| freqs_r(i, 0) = *itr_freq; | |||
| itr_freq++; | |||
| } | |||
| int k_num = spectrogram_tensor->Size() / (waveform * channals); | |||
| std::vector<Eigen::MatrixXd> specgram; | |||
| std::vector<Eigen::MatrixXd> specgram_result; | |||
| std::vector<Eigen::MatrixXd> specgram_sum; | |||
| Eigen::MatrixXd tmp = Eigen::MatrixXd::Zero(channals, waveform); | |||
| auto itr_spectrogram = spectrogram_tensor->begin<T>(); | |||
| for (int k = 0; k < k_num; k++) { | |||
| for (int i = 0; i < channals; ++i) { | |||
| for (int j = 0; j < waveform; ++j) { | |||
| tmp(i, j) = *itr_spectrogram; | |||
| itr_spectrogram++; | |||
| } | |||
| } | |||
| specgram.push_back(tmp); | |||
| specgram_sum.push_back(specgram[k].colwise().sum()); | |||
| } | |||
| for (int k = 0; k < k_num; k++) { | |||
| for (int i = 0; i < channals; ++i) { | |||
| for (int j = 0; j < waveform; ++j) { | |||
| tmp(i, j) = freqs_r(i, 0) * specgram[k](i, j); | |||
| } | |||
| } | |||
| specgram_result.push_back((tmp).colwise().sum()); | |||
| } | |||
| auto itr_output = output_tensor->begin<T>(); | |||
| for (int k = 0; k < k_num; k++) { | |||
| for (int i = 0; i < waveform; ++i) { | |||
| *itr_output = specgram_result[k](0, i) / specgram_sum[k](0, i); | |||
| itr_output++; | |||
| } | |||
| } | |||
| *output = output_tensor; | |||
| return Status::OK(); | |||
| } | |||
| Status SpectralCentroid(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int sample_rate, | |||
| int n_fft, int win_length, int hop_length, int pad, WindowType window) { | |||
| RETURN_IF_NOT_OK(ValidateLowRank("SpectralCentroid", input, kMinAudioDim, "<..., time>")); | |||
| RETURN_IF_NOT_OK(ValidateTensorNumeric("SpectralCentroid", input)); | |||
| std::shared_ptr<Tensor> input_tensor; | |||
| if (input->type() != DataType::DE_FLOAT64) { | |||
| RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32))); | |||
| return SpectralCentroidImpl<float>(input_tensor, output, sample_rate, n_fft, win_length, hop_length, pad, window); | |||
| } else { | |||
| input_tensor = input; | |||
| return SpectralCentroidImpl<double>(input_tensor, output, sample_rate, n_fft, win_length, hop_length, pad, window); | |||
| } | |||
| } | |||
| Status ComputeDeltas(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t win_length, | |||
| const BorderType &mode) { | |||
| RETURN_IF_NOT_OK(ValidateLowRank("ComputeDeltas", input, kDefaultAudioDim, "<..., freq, time>")); | |||
| @@ -333,6 +333,19 @@ Status Spectrogram(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> | |||
| int n_fft, int hop_length, int win_length, float power, bool normalized, bool center, | |||
| BorderType pad_mode, bool onesided); | |||
| /// \brief Transform audio signal into spectrogram. | |||
| /// \param[in] input Tensor of shape <..., time>. | |||
| /// \param[out] output Tensor of shape <..., time>. | |||
| /// \param[in] sample_rate The sample rate of input tensor. | |||
| /// \param[in] n_fft Size of FFT, creates n_fft / 2 + 1 bins. | |||
| /// \param[in] win_length Window size. | |||
| /// \param[in] hop_length Length of hop between STFT windows. | |||
| /// \param[in] pad Two sided padding of signal. | |||
| /// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window. | |||
| /// \return Status code. | |||
| Status SpectralCentroid(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int sample_rate, | |||
| int n_fft, int win_length, int hop_length, int pad, WindowType window); | |||
| /// \brief Stretch STFT in time at a given rate, without changing the pitch. | |||
| /// \param input: Tensor of shape <..., freq, time>. | |||
| /// \param rate: Stretch factor. | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/audio/kernels/spectral_centroid_op.h" | |||
| #include "minddata/dataset/audio/kernels/audio_utils.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| Status SpectralCentroidOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| return SpectralCentroid(input, output, sample_rate_, n_fft_, win_length_, hop_length_, pad_, window_); | |||
| } | |||
| Status SpectralCentroidOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) { | |||
| RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs)); | |||
| RETURN_IF_NOT_OK( | |||
| ValidateTensorType("SpectralCentroid", inputs[0].IsNumeric(), "[int, float, double]", inputs[0].ToString())); | |||
| if (inputs[0] == DataType(DataType::DE_FLOAT64)) { | |||
| outputs[0] = DataType(DataType::DE_FLOAT64); | |||
| } else { | |||
| outputs[0] = DataType(DataType::DE_FLOAT32); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_SPECTRAL_CENTROID_OP_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_SPECTRAL_CENTROID_OP_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/kernels/tensor_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class SpectralCentroidOp : public TensorOp { | |||
| public: | |||
| SpectralCentroidOp(int32_t sample_rate, int32_t n_fft, int32_t win_length, int32_t hop_length, int32_t pad, | |||
| WindowType window) | |||
| : sample_rate_(sample_rate), | |||
| n_fft_(n_fft), | |||
| win_length_(win_length), | |||
| hop_length_(hop_length), | |||
| pad_(pad), | |||
| window_(window) {} | |||
| ~SpectralCentroidOp() = default; | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override; | |||
| std::string Name() const override { return kSpectralCentroidOp; }; | |||
| private: | |||
| int32_t sample_rate_; | |||
| int32_t n_fft_; | |||
| int32_t win_length_; | |||
| int32_t hop_length_; | |||
| int32_t pad_; | |||
| WindowType window_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_SPECTRAL_CENTROID_OP_H_ | |||
| @@ -758,6 +758,39 @@ class MS_API SlidingWindowCmn final : public TensorTransform { | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief Create a spectral centroid from an audio signal. | |||
| class MS_API SpectralCentroid : public TensorTransform { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). | |||
| /// \param[in] n_fft Size of FFT, creates n_fft / 2 + 1 bins (Default: 400). | |||
| /// \param[in] win_length Window size (Default: 0, will use n_fft). | |||
| /// \param[in] hop_length Length of hop between STFT windows (Default: 0, will use win_length / 2). | |||
| /// \param[in] pad Two sided padding of signal (Default: 0). | |||
| /// \param[in] window Window function that is applied/multiplied to each frame/window, | |||
| /// which can be WindowType::kBartlett, WindowType::kBlackman, WindowType::kHamming, | |||
| /// WindowType::kHann or WindowType::kKaiser (Default: WindowType::kHann). | |||
| SpectralCentroid(int sample_rate, int32_t n_fft = 400, int32_t win_length = 0, int32_t hop_length = 0, | |||
| int32_t pad = 0, WindowType window = WindowType::kHann); | |||
| ~SpectralCentroid() = default; | |||
| protected: | |||
| /// \brief Function to convert TensorTransform object into a TensorOperation object. | |||
| /// \return Shared pointer to TensorOperation object. | |||
| std::shared_ptr<TensorOperation> Parse() override; | |||
| private: | |||
| int32_t sample_rate_; | |||
| int32_t n_fft_; | |||
| int32_t win_length_; | |||
| int32_t hop_length_; | |||
| int32_t pad_; | |||
| WindowType window_; | |||
| struct Data; | |||
| std::shared_ptr<Data> data_; | |||
| }; | |||
| /// \brief Create a spectrogram from an audio signal. | |||
| class MS_API Spectrogram : public TensorTransform { | |||
| public: | |||
| @@ -177,6 +177,7 @@ constexpr char kOverdriveOp[] = "OverdriveOp"; | |||
| constexpr char kPhaserOp[] = "PhaserOp"; | |||
| constexpr char kRiaaBiquadOp[] = "RiaaBiquadOp"; | |||
| constexpr char kSlidingWindowCmnOp[] = "SlidingWindowCmnOp"; | |||
| constexpr char kSpectralCentroidOp[] = "SpectralCentroidOp"; | |||
| constexpr char kSpectrogramOp[] = "SpectrogramOp"; | |||
| constexpr char kTimeMaskingOp[] = "TimeMaskingOp"; | |||
| constexpr char kTimeStretchOp[] = "TimeStretchOp"; | |||
| @@ -29,8 +29,8 @@ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_ | |||
| check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \ | |||
| check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_highpass_biquad, \ | |||
| check_lfilter, check_lowpass_biquad, check_magphase, check_masking, check_mu_law_coding, check_overdrive, \ | |||
| check_phaser, check_riaa_biquad, check_sliding_window_cmn, check_spectrogram, check_time_stretch, \ | |||
| check_treble_biquad, check_vol | |||
| check_phaser, check_riaa_biquad, check_sliding_window_cmn, check_spectral_centroid, check_spectrogram, \ | |||
| check_time_stretch, check_treble_biquad, check_vol | |||
| class AudioTensorOperation(TensorOperation): | |||
| @@ -1019,6 +1019,43 @@ DE_C_WINDOW_TYPE = {WindowType.BARTLETT: cde.WindowType.DE_BARTLETT, | |||
| WindowType.KAISER: cde.WindowType.DE_KAISER} | |||
| class SpectralCentroid(TensorOperation): | |||
| """ | |||
| Create a spectral centroid from an audio signal. | |||
| Args: | |||
| sample_rate (int): Sampling rate of the waveform, e.g. 44100 (Hz). | |||
| n_fft (int, optional): Size of FFT, creates n_fft // 2 + 1 bins (default=400). | |||
| win_length (int, optional): Window size (default=None, will use n_fft). | |||
| hop_length (int, optional): Length of hop between STFT windows (default=None, will use win_length // 2). | |||
| pad (int, optional): Two sided padding of signal (default=0). | |||
| window (WindowType, optional): Window function that is applied/multiplied to each frame/window, | |||
| which can be WindowType.BARTLETT, WindowType.BLACKMAN, WindowType.HAMMING, WindowType.HANN | |||
| or WindowType.KAISER (default=WindowType.HANN). | |||
| Examples: | |||
| >>> import numpy as np | |||
| >>> | |||
| >>> waveform = np.random.random([5, 10, 20]) | |||
| >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"]) | |||
| >>> transforms = [audio.SpectralCentroid(44100)] | |||
| >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"]) | |||
| """ | |||
| @check_spectral_centroid | |||
| def __init__(self, sample_rate, n_fft=400, win_length=None, hop_length=None, pad=0, window=WindowType.HANN): | |||
| self.sample_rate = sample_rate | |||
| self.pad = pad | |||
| self.window = window | |||
| self.n_fft = n_fft | |||
| self.win_length = win_length if win_length else n_fft | |||
| self.hop_length = hop_length if hop_length else self.win_length // 2 | |||
| def parse(self): | |||
| return cde.SpectralCentroidOperation(self.sample_rate, self.n_fft, self.win_length, self.hop_length, | |||
| self.pad, DE_C_WINDOW_TYPE[self.window]) | |||
| class Spectrogram(TensorOperation): | |||
| """ | |||
| Create a spectrogram from an audio signal. | |||
| @@ -655,3 +655,32 @@ def check_compute_deltas(method): | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_spectral_centroid(method): | |||
| """Wrapper method to check the parameters of SpectralCentroid.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [sample_rate, n_fft, win_length, hop_length, pad, window], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(sample_rate, (int,), "sample_rate") | |||
| check_non_negative_int32(sample_rate, "sample_rate") | |||
| type_check(pad, (int,), "pad") | |||
| check_non_negative_int32(pad, "pad") | |||
| type_check(window, (WindowType,), "window") | |||
| type_check(n_fft, (int,), "n_fft") | |||
| check_pos_int32(n_fft, "n_fft") | |||
| if win_length is not None: | |||
| type_check(win_length, (int,), "win_length") | |||
| check_pos_int32(win_length, "win_length") | |||
| if win_length > n_fft: | |||
| raise ValueError( | |||
| "Input win_length should be no more than n_fft, but got win_length: {0} and n_fft: {1}.".format( | |||
| win_length, n_fft)) | |||
| if hop_length is not None: | |||
| type_check(hop_length, (int,), "hop_length") | |||
| check_pos_int32(hop_length, "hop_length") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| @@ -55,12 +55,12 @@ TEST_F(MindDataTestPipeline, TestRiaaBiquadBasicSampleRate44100) { | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| @@ -94,12 +94,12 @@ TEST_F(MindDataTestPipeline, TestRiaaBiquadBasicSampleRate48000) { | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| @@ -133,12 +133,12 @@ TEST_F(MindDataTestPipeline, TestRiaaBiquadBasicSampleRate88200) { | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| @@ -172,12 +172,12 @@ TEST_F(MindDataTestPipeline, TestRiaaBiquadBasicSampleRate96000) { | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| @@ -255,6 +255,131 @@ TEST_F(MindDataTestPipeline, TestSlidingWindowCmnWrongArgs) { | |||
| EXPECT_EQ(iter_2, nullptr); | |||
| } | |||
| /// Feature: SpectralCentroid. | |||
| /// Description: test pipeline. | |||
| /// Expectation: success. | |||
| TEST_F(MindDataTestPipeline, TestSpectralCentroidBasic) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpectralCentroidBasic."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 60})); | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectral_centroid = audio::SpectralCentroid({44100, 8, 8, 4, 1, WindowType::kHann}); | |||
| auto ds1 = ds->Map({spectral_centroid}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds1->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 8); | |||
| iter->Stop(); | |||
| } | |||
| /// Feature: SpectralCentroid. | |||
| /// Description: test pipeline. | |||
| /// Expectation: success. | |||
| TEST_F(MindDataTestPipeline, TestSpectralCentroidDefault) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpectralCentroidDefault."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 60})); | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectral_centroid = audio::SpectralCentroid({44100}); | |||
| auto ds1 = ds->Map({spectral_centroid}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| std::shared_ptr<Iterator> iter = ds1->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 8); | |||
| iter->Stop(); | |||
| } | |||
| /// Feature: SpectralCentroid. | |||
| /// Description: test some invalid parameters. | |||
| /// Expectation: success. | |||
| TEST_F(MindDataTestPipeline, TestSpectralCentroidWrongArgs) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSpectralCentroidWrongArgs."; | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| // Original waveform | |||
| ASSERT_OK(schema->add_column("col", mindspore::DataType::kNumberTypeFloat32, {1, 50})); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema); | |||
| std::shared_ptr<Dataset> ds01; | |||
| std::shared_ptr<Dataset> ds02; | |||
| std::shared_ptr<Dataset> ds03; | |||
| std::shared_ptr<Dataset> ds04; | |||
| std::shared_ptr<Dataset> ds05; | |||
| EXPECT_NE(ds, nullptr); | |||
| // Check n_fft | |||
| MS_LOG(INFO) << "n_fft is zero."; | |||
| auto spectral_centroid_op_1 = audio::SpectralCentroid({44100, 0, 8, 4, 1, WindowType::kHann}); | |||
| ds01 = ds->Map({spectral_centroid_op_1}); | |||
| EXPECT_NE(ds01, nullptr); | |||
| std::shared_ptr<Iterator> iter01 = ds01->CreateIterator(); | |||
| EXPECT_EQ(iter01, nullptr); | |||
| // Check win_length | |||
| MS_LOG(INFO) << "win_length is -1."; | |||
| auto spectral_centroid_op_2 = audio::SpectralCentroid({44100, 8, -1, 4, 1, WindowType::kHann}); | |||
| ds02 = ds->Map({spectral_centroid_op_2}); | |||
| EXPECT_NE(ds02, nullptr); | |||
| std::shared_ptr<Iterator> iter02 = ds02->CreateIterator(); | |||
| EXPECT_EQ(iter02, nullptr); | |||
| // Check hop_length | |||
| MS_LOG(INFO) << "hop_length is -1."; | |||
| auto spectral_centroid_op_3 = audio::SpectralCentroid({44100, 8, 8, -1, 1, WindowType::kHann}); | |||
| ds03 = ds->Map({spectral_centroid_op_3}); | |||
| EXPECT_NE(ds03, nullptr); | |||
| std::shared_ptr<Iterator> iter03 = ds03->CreateIterator(); | |||
| EXPECT_EQ(iter03, nullptr); | |||
| // Check pad | |||
| MS_LOG(INFO) << "pad is -1."; | |||
| auto spectral_centroid_op_4 = audio::SpectralCentroid({44100, 8, 8, 4, -1, WindowType::kHann}); | |||
| ds04 = ds->Map({spectral_centroid_op_4}); | |||
| EXPECT_NE(ds04, nullptr); | |||
| std::shared_ptr<Iterator> iter04 = ds04->CreateIterator(); | |||
| EXPECT_EQ(iter04, nullptr); | |||
| // Check sample_rate | |||
| MS_LOG(INFO) << "sample_rate is -1."; | |||
| auto spectral_centroid_op_5 = audio::SpectralCentroid({-1, 8, 8, 4, 8, WindowType::kHann}); | |||
| ds05 = ds->Map({spectral_centroid_op_5}); | |||
| EXPECT_NE(ds05, nullptr); | |||
| std::shared_ptr<Iterator> iter05 = ds04->CreateIterator(); | |||
| EXPECT_EQ(iter05, nullptr); | |||
| } | |||
| /// Feature: Spectrogram. | |||
| /// Description: test pipeline. | |||
| /// Expectation: success. | |||
| @@ -266,8 +391,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramDefault) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -298,8 +423,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramOnesidedFalse) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| false}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, false}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -330,8 +455,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramCenterFalse) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, false, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, false, false, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -362,8 +487,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramNormalizedTrue) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, true, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, 2.0, true, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -394,8 +519,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWindowHamming) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -426,8 +551,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramPadmodeEdge) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 2.0, false, true, | |||
| BorderType::kEdge, true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 2.0, false, true, BorderType::kEdge, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -458,8 +583,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramPower0) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHamming, 0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -490,8 +615,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramNfft50) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({50, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({50, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -522,8 +647,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramPad10) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 20, 10, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 20, 10, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -554,8 +679,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWinlength30) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 30, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 30, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -586,8 +711,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramHoplength30) { | |||
| std::shared_ptr<Dataset> ds = RandomData(8, schema); | |||
| EXPECT_NE(ds, nullptr); | |||
| auto spectrogram = audio::Spectrogram({40, 40, 30, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, | |||
| true}); | |||
| auto spectrogram = | |||
| audio::Spectrogram({40, 40, 30, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| auto ds1 = ds->Map({spectrogram}, {"waveform"}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| @@ -627,8 +752,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check n_fft | |||
| MS_LOG(INFO) << "n_fft is zero."; | |||
| auto spectrogram_op_01 = audio::Spectrogram({0, 40, 20, 0, WindowType::kHann, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_01 = | |||
| audio::Spectrogram({0, 40, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| ds01 = ds->Map({spectrogram_op_01}); | |||
| EXPECT_NE(ds01, nullptr); | |||
| @@ -637,8 +762,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check win_length | |||
| MS_LOG(INFO) << "win_length is -1."; | |||
| auto spectrogram_op_02 = audio::Spectrogram({40, -1, 20, 0, WindowType::kHann, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_02 = | |||
| audio::Spectrogram({40, -1, 20, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| ds02 = ds->Map({spectrogram_op_02}); | |||
| EXPECT_NE(ds02, nullptr); | |||
| @@ -647,8 +772,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check hop_length | |||
| MS_LOG(INFO) << "hop_length is -1."; | |||
| auto spectrogram_op_03 = audio::Spectrogram({40, 40, -1, 0, WindowType::kHann, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_03 = | |||
| audio::Spectrogram({40, 40, -1, 0, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| ds03 = ds->Map({spectrogram_op_03}); | |||
| EXPECT_NE(ds03, nullptr); | |||
| @@ -657,8 +782,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check power | |||
| MS_LOG(INFO) << "power is -1."; | |||
| auto spectrogram_op_04 = audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, -1, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_04 = | |||
| audio::Spectrogram({40, 40, 20, 0, WindowType::kHann, -1, false, true, BorderType::kReflect, true}); | |||
| ds04 = ds->Map({spectrogram_op_04}); | |||
| EXPECT_NE(ds04, nullptr); | |||
| @@ -667,8 +792,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check pad | |||
| MS_LOG(INFO) << "pad is -1."; | |||
| auto spectrogram_op_05 = audio::Spectrogram({40, 40, 20, -1, WindowType::kHann, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_05 = | |||
| audio::Spectrogram({40, 40, 20, -1, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| ds05 = ds->Map({spectrogram_op_05}); | |||
| EXPECT_NE(ds05, nullptr); | |||
| @@ -677,8 +802,8 @@ TEST_F(MindDataTestPipeline, TestSpectrogramWrongArgs) { | |||
| // Check n_fft and win)length | |||
| MS_LOG(INFO) << "n_fft is 40, win_length is 50."; | |||
| auto spectrogram_op_06 = audio::Spectrogram({40, 50, 20, -1, WindowType::kHann, 2.0, false, true, | |||
| BorderType::kReflect, true}); | |||
| auto spectrogram_op_06 = | |||
| audio::Spectrogram({40, 50, 20, -1, WindowType::kHann, 2.0, false, true, BorderType::kReflect, true}); | |||
| ds06 = ds->Map({spectrogram_op_06}); | |||
| EXPECT_NE(ds06, nullptr); | |||
| @@ -841,12 +966,12 @@ TEST_F(MindDataTestPipeline, TestTrebleBiquadBasic) { | |||
| int i = 0; | |||
| while (row.size() != 0) { | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| auto col = row["waveform"]; | |||
| ASSERT_EQ(col.Shape(), expected); | |||
| ASSERT_EQ(col.Shape().size(), 2); | |||
| ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| i++; | |||
| } | |||
| EXPECT_EQ(i, 50); | |||
| @@ -1176,12 +1176,9 @@ TEST_F(MindDataTestExecute, TestPhaserBasicWithEager) { | |||
| /// Expectation: throw exception correctly | |||
| TEST_F(MindDataTestExecute, TestPhaserInputArgWithEager) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-TestPhaserInputArgWithEager"; | |||
| std::vector<double> labels = { | |||
| 0.271, 1.634, 9.246, 0.108, | |||
| 1.138, 1.156, 3.394, 1.55, | |||
| 3.614, 1.8402, 0.718, 4.599, | |||
| 5.64, 2.510620117187500000e-02, 1.38, 5.825, | |||
| 4.1906, 5.28, 1.052, 9.36}; | |||
| std::vector<double> labels = {0.271, 1.634, 9.246, 0.108, 1.138, 1.156, 3.394, | |||
| 1.55, 3.614, 1.8402, 0.718, 4.599, 5.64, 2.510620117187500000e-02, | |||
| 1.38, 5.825, 4.1906, 5.28, 1.052, 9.36}; | |||
| std::shared_ptr<Tensor> input; | |||
| ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({4, 5}), &input)); | |||
| @@ -1995,11 +1992,11 @@ TEST_F(MindDataTestExecute, TestCharNGramParam) { | |||
| // Create expected output. | |||
| std::shared_ptr<Tensor> de_expected01; | |||
| std::vector<float> expected01 = {-0.840079,-0.0270003,-0.833472,0.588367,-0.210012}; | |||
| std::vector<float> expected01 = {-0.840079, -0.0270003, -0.833472, 0.588367, -0.210012}; | |||
| ASSERT_OK(Tensor::CreateFromVector(expected01, &de_expected01)); | |||
| auto ms_expected01 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected01)); | |||
| std::shared_ptr<Tensor> de_expected02; | |||
| std::vector<float> expected02 = {-1.34122,0.0442693,-0.48697,0.662939,-0.367669}; | |||
| std::vector<float> expected02 = {-1.34122, 0.0442693, -0.48697, 0.662939, -0.367669}; | |||
| ASSERT_OK(Tensor::CreateFromVector(expected02, &de_expected02)); | |||
| auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02)); | |||
| @@ -2059,7 +2056,7 @@ TEST_F(MindDataTestExecute, TestToVectorsParamForCharNGram) { | |||
| ASSERT_OK(Tensor::CreateFromVector(expected02, &de_expected02)); | |||
| auto ms_expected02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected02)); | |||
| std::shared_ptr<Tensor> de_expected03; | |||
| std::vector<float> expected03 = {-0.840079,-0.0270003,-0.833472,0.588367,-0.210012}; | |||
| std::vector<float> expected03 = {-0.840079, -0.0270003, -0.833472, 0.588367, -0.210012}; | |||
| ASSERT_OK(Tensor::CreateFromVector(expected03, &de_expected03)); | |||
| auto ms_expected03 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected03)); | |||
| @@ -2229,10 +2226,43 @@ TEST_F(MindDataTestExecute, TestSpectrogramEager) { | |||
| std::vector<double> waveform = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1}; | |||
| ASSERT_OK(Tensor::CreateFromVector(waveform, TensorShape({1, (long)waveform.size()}), &test_input_tensor)); | |||
| auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(test_input_tensor)); | |||
| std::shared_ptr<TensorTransform> spectrogram = std::make_shared<audio::Spectrogram>(8, 8, 4, 0, WindowType::kHann, | |||
| 2., false, true, | |||
| BorderType::kReflect, true); | |||
| std::shared_ptr<TensorTransform> spectrogram = | |||
| std::make_shared<audio::Spectrogram>(8, 8, 4, 0, WindowType::kHann, 2., false, true, BorderType::kReflect, true); | |||
| auto transform = Execute({spectrogram}); | |||
| Status rc = transform({input_tensor}, &input_tensor); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| } | |||
| /// Feature: SpectralCentroid. | |||
| /// Description: test SpectralCentroid in eager mode. | |||
| /// Expectation: the data is processed successfully. | |||
| TEST_F(MindDataTestExecute, TestSpectralCentroidEager) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-SpectralCentroidEager."; | |||
| std::shared_ptr<Tensor> test_input_tensor; | |||
| std::vector<double> waveform = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1}; | |||
| ASSERT_OK(Tensor::CreateFromVector(waveform, TensorShape({1, (long)waveform.size()}), &test_input_tensor)); | |||
| auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(test_input_tensor)); | |||
| std::shared_ptr<TensorTransform> spectral_centroid = | |||
| std::make_shared<audio::SpectralCentroid>(44100, 8, 8, 4, 1, WindowType::kHann); | |||
| auto transform = Execute({spectral_centroid}); | |||
| Status rc = transform({input_tensor}, &input_tensor); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| } | |||
| /// Feature: SpectralCentroid. | |||
| /// Description: test wrong input args of SpectralCentroid in eager mode. | |||
| /// Expectation: Expectation: throw exception correctly | |||
| TEST_F(MindDataTestExecute, TestSpectralCentroidWithWrongArg) { | |||
| MS_LOG(INFO) << "Doing MindDataTestExecute-TestSpectralCentroidWithWrongArg."; | |||
| std::shared_ptr<Tensor> test_input_tensor; | |||
| std::vector<double> waveform = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1}; | |||
| ASSERT_OK(Tensor::CreateFromVector(waveform, TensorShape({1, (long)waveform.size()}), &test_input_tensor)); | |||
| auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(test_input_tensor)); | |||
| // Check sample_rate | |||
| MS_LOG(INFO) << "sample_rate is zero."; | |||
| std::shared_ptr<TensorTransform> spectral_centroid = | |||
| std::make_shared<audio::SpectralCentroid>(0, 8, 8, 4, 1, WindowType::kHann); | |||
| auto transform = Execute({spectral_centroid}); | |||
| Status rc = transform({input_tensor}, &input_tensor); | |||
| EXPECT_FALSE(rc.IsOk()); | |||
| } | |||
| @@ -0,0 +1,122 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Testing SpectralCentroid Python API | |||
| """ | |||
| import numpy as np | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.audio.transforms as audio | |||
| from mindspore import log as logger | |||
| def count_unequal_element(data_expected, data_me, rtol, atol): | |||
| """ Precision calculation func """ | |||
| assert data_expected.shape == data_me.shape | |||
| total_count = len(data_expected.flatten()) | |||
| error = np.abs(data_expected - data_me) | |||
| greater = np.greater(error, atol + np.abs(data_expected) * rtol) | |||
| loss_count = np.count_nonzero(greater) | |||
| assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( | |||
| data_expected[greater], data_me[greater], error[greater]) | |||
| def test_spectral_centroid_pipeline(): | |||
| """ | |||
| Feature: mindspore pipeline mode normal testcase: spectral_centroid op. | |||
| Description: input audio signal to test pipeline. | |||
| Expectation: success. | |||
| """ | |||
| logger.info("test_spectral_centroid_pipeline") | |||
| wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]] | |||
| dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False) | |||
| out = audio.SpectralCentroid(sample_rate=44100, n_fft=8) | |||
| dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["SpectralCentroid"], | |||
| column_order=['SpectralCentroid']) | |||
| result = np.array([[[4436.1182, 3580.0718, 2902.4917, 3334.8962, 5199.8350, 6284.4814, | |||
| 3580.0718, 2895.5659]]]) | |||
| for data1 in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count_unequal_element(data1["SpectralCentroid"], result, 0.0001, 0.0001) | |||
| def test_spectral_centroid_eager(): | |||
| """ | |||
| Feature: mindspore eager mode normal testcase: spectral_centroid op. | |||
| Description: input audio signal to test eager. | |||
| Expectation: success. | |||
| """ | |||
| logger.info("test_spectral_centroid_eager") | |||
| wav = np.array([[1.2, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5.5, 6.5]]) | |||
| spectral_centroid_op = audio.SpectralCentroid(sample_rate=48000, n_fft=8) | |||
| out = spectral_centroid_op(wav) | |||
| result = np.array([[[5276.65022959, 3896.67543098, 3159.17400004, 3629.81957922, | |||
| 5659.68456649, 6840.25126846, 3896.67543098, 3316.97434286]]]) | |||
| count_unequal_element(out, result, 0.0001, 0.0001) | |||
| def test_spectral_centroid_param(): | |||
| """ | |||
| Feature: test spectral_centroid invalid parameter. | |||
| Description: test some invalid parameters. | |||
| Expectation: success. | |||
| """ | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=-1) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input sample_rate is not within the required interval of [0, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, n_fft=-1) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input n_fft is not within the required interval of [1, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, n_fft=0) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input n_fft is not within the required interval of [1, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, win_length=-1) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input win_length is not within the required interval of [1, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, win_length="s") | |||
| except TypeError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Argument win_length with value s is not of type [<class 'int'>], but got <class 'str'>." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, hop_length=-1) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input hop_length is not within the required interval of [1, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, hop_length=-100) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input hop_length is not within the required interval of [1, 2147483647]." in str(error) | |||
| try: | |||
| _ = audio.SpectralCentroid(sample_rate=48000, win_length=300, n_fft=200) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in SpectralCentroid: {}".format(str(error))) | |||
| assert "Input win_length should be no more than n_fft, but got win_length: 300 and n_fft: 200." \ | |||
| in str(error) | |||
| if __name__ == "__main__": | |||
| test_spectral_centroid_pipeline() | |||
| test_spectral_centroid_eager() | |||
| test_spectral_centroid_param() | |||