You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Enum for audio ops.
  17. """
  18. from enum import Enum
  19. import mindspore._c_dataengine as cde
  20. class FadeShape(str, Enum):
  21. """
  22. Fade Shapes.
  23. Possible enumeration values are: FadeShape.EXPONENTIAL, FadeShape.HALFSINE, FadeShape.LINEAR,
  24. FadeShape.LOGARITHMIC, FadeShape.QUARTERSINE.
  25. - FadeShape.EXPONENTIAL: means the fade shape is exponential mode.
  26. - FadeShape.HALFSINE: means the fade shape is half_sine mode.
  27. - FadeShape.LINEAR: means the fade shape is linear mode.
  28. - FadeShape.LOGARITHMIC: means the fade shape is logarithmic mode.
  29. - FadeShape.QUARTERSINE: means the fade shape is quarter_sine mode.
  30. """
  31. LINEAR: str = "linear"
  32. EXPONENTIAL: str = "exponential"
  33. LOGARITHMIC: str = "logarithmic"
  34. QUARTERSINE: str = "quarter_sine"
  35. HALFSINE: str = "half_sine"
  36. class GainType(str, Enum):
  37. """"
  38. Gain Types.
  39. Possible enumeration values are: GainType.AMPLITUDE, GainType.DB, GainType.POWER.
  40. - GainType.AMPLITUDE: means input gain type is amplitude.
  41. - GainType.DB: means input gain type is decibel.
  42. - GainType.POWER: means input gain type is power.
  43. """
  44. POWER: str = "power"
  45. AMPLITUDE: str = "amplitude"
  46. DB: str = "db"
  47. class Interpolation(str, Enum):
  48. """
  49. Interpolation Type.
  50. Possible enumeration values are: Interpolation.LINEAR, Interpolation.QUADRATIC.
  51. - Interpolation.LINEAR: means input interpolation type is linear.
  52. - Interpolation.QUADRATIC: means input interpolation type is quadratic.
  53. """
  54. LINEAR: str = "linear"
  55. QUADRATIC: str = "quadratic"
  56. class Modulation(str, Enum):
  57. """
  58. Modulation Type.
  59. Possible enumeration values are: Modulation.SINUSOIDAL, Modulation.TRIANGULAR.
  60. - Modulation.SINUSOIDAL: means input modulation type is sinusoidal.
  61. - Modulation.TRIANGULAR: means input modulation type is triangular.
  62. """
  63. SINUSOIDAL: str = "sinusoidal"
  64. TRIANGULAR: str = "triangular"
  65. class ScaleType(str, Enum):
  66. """
  67. Scale Types.
  68. Possible enumeration values are: ScaleType.MAGNITUDE, ScaleType.POWER.
  69. - ScaleType.MAGNITUDE: means the scale of input audio is magnitude.
  70. - ScaleType.POWER: means the scale of input audio is power.
  71. """
  72. POWER: str = "power"
  73. MAGNITUDE: str = "magnitude"
  74. class NormMode(str, Enum):
  75. """
  76. Norm Types.
  77. Possible enumeration values are: NormMode.NONE, NormMode.ORTHO.
  78. - NormMode.NONE: means the mode of input audio is none.
  79. - NormMode.ORTHO: means the mode of input audio is ortho.
  80. """
  81. NONE: str = "none"
  82. ORTHO: str = "ortho"
  83. DE_C_NORMMODE_TYPE = {NormMode.NONE: cde.NormMode.DE_NORMMODE_NONE,
  84. NormMode.ORTHO: cde.NormMode.DE_NORMMODE_ORTHO}
  85. def CreateDct(n_mfcc, n_mels, norm=NormMode.NONE):
  86. """
  87. Create a DCT transformation matrix with shape (n_mels, n_mfcc), normalized depending on norm.
  88. Args:
  89. n_mfcc (int): Number of mfc coefficients to retain, the value must be greater than 0.
  90. n_mels (int): Number of mel filterbanks, the value must be greater than 0.
  91. norm (NormMode): Normalization mode, can be NormMode.NONE or NormMode.ORTHO (default=NormMode.NONE).
  92. Returns:
  93. numpy.ndarray, the transformation matrix, to be right-multiplied to row-wise data of size (n_mels, n_mfcc).
  94. Examples:
  95. >>> dct = audio.CreateDct(100, 200, audio.NormMode.NONE)
  96. """
  97. if not isinstance(n_mfcc, int):
  98. raise TypeError("n_mfcc with value {0} is not of type {1}, but got {2}.".format(
  99. n_mfcc, int, type(n_mfcc)))
  100. if not isinstance(n_mels, int):
  101. raise TypeError("n_mels with value {0} is not of type {1}, but got {2}.".format(
  102. n_mels, int, type(n_mels)))
  103. if not isinstance(norm, NormMode):
  104. raise TypeError("norm with value {0} is not of type {1}, but got {2}.".format(
  105. norm, NormMode, type(norm)))
  106. if n_mfcc <= 0:
  107. raise ValueError("n_mfcc must be greater than 0, but got {0}.".format(n_mfcc))
  108. if n_mels <= 0:
  109. raise ValueError("n_mels must be greater than 0, but got {0}.".format(n_mels))
  110. return cde.CreateDct(n_mfcc, n_mels, DE_C_NORMMODE_TYPE[norm]).as_array()
  111. class BorderType(str, Enum):
  112. """
  113. Padding Mode, BorderType Type.
  114. Possible enumeration values are: BorderType.CONSTANT, BorderType.EDGE, BorderType.REFLECT, BorderType.SYMMETRIC.
  115. - BorderType.CONSTANT: means it fills the border with constant values.
  116. - BorderType.EDGE: means it pads with the last value on the edge.
  117. - BorderType.REFLECT: means it reflects the values on the edge omitting the last value of edge.
  118. - BorderType.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge.
  119. Note: This class derived from class str to support json serializable.
  120. """
  121. CONSTANT: str = "constant"
  122. EDGE: str = "edge"
  123. REFLECT: str = "reflect"
  124. SYMMETRIC: str = "symmetric"