# Copyright 2024 Tencent # SPDX-License-Identifier: BSD-3-Clause import torch import torch.nn as nn import torch.nn.functional as F import torchaudio from packaging import version class Model(nn.Module): def __init__(self): super(Model, self).__init__() def forward(self, x, y): x0 = torchaudio.functional.spectrogram(x, n_fft=64, window=torch.hann_window(44), win_length=44, hop_length=16, pad=0, center=True, normalized='window', power=1) if version.parse(torchaudio.__version__) < version.parse('0.11.0'): # return_complex=False with power=None, skip it x1 = torchaudio.functional.spectrogram(x, n_fft=128, window=torch.hann_window(128), win_length=128, hop_length=3, pad=0, center=False, onesided=True, normalized=False, power=1) else: x1 = torchaudio.functional.spectrogram(x, n_fft=128, window=torch.hann_window(128), win_length=128, hop_length=3, pad=0, center=False, onesided=True, normalized=False, power=None) x2 = torchaudio.functional.spectrogram(x, n_fft=512, window=torch.hamming_window(256), win_length=256, hop_length=128, pad=0, center=True, pad_mode='constant', onesided=True, normalized='frame_length', power=2) x3 = torchaudio.functional.spectrogram(x, n_fft=512, window=torch.hamming_window(512), win_length=512, hop_length=128, pad=32, center=True, onesided=False, normalized=False, power=2) y0 = torchaudio.functional.spectrogram(y, n_fft=64, window=torch.hann_window(44), win_length=44, hop_length=16, pad=0, center=True, normalized='window', power=1) if version.parse(torchaudio.__version__) < version.parse('0.11.0'): # return_complex=False with power=None, skip it y1 = torchaudio.functional.spectrogram(y, n_fft=128, window=torch.hann_window(128), win_length=128, hop_length=3, pad=0, center=False, onesided=True, normalized=False, power=1) else: y1 = torchaudio.functional.spectrogram(y, n_fft=128, window=torch.hann_window(128), win_length=128, hop_length=3, pad=0, center=False, onesided=True, normalized=False, power=None) y2 = torchaudio.functional.spectrogram(y, n_fft=512, window=torch.hamming_window(256), win_length=256, hop_length=128, pad=0, center=True, pad_mode='constant', onesided=True, normalized='frame_length', power=2) y3 = torchaudio.functional.spectrogram(y, n_fft=512, window=torch.hamming_window(512), win_length=512, hop_length=128, pad=32, center=True, onesided=False, normalized=False, power=2) return x0, x1, x2, x3, y0, y1, y2, y3 def test(): net = Model() net.eval() torch.manual_seed(0) x = torch.rand(3, 2560) y = torch.rand(1000) a = net(x, y) # export torchscript mod = torch.jit.trace(net, (x, y)) mod.save("test_torchaudio_F_spectrogram.pt") # torchscript to pnnx import os os.system("../src/pnnx test_torchaudio_F_spectrogram.pt inputshape=[3,2560],[1000]") # pnnx inference import test_torchaudio_F_spectrogram_pnnx b = test_torchaudio_F_spectrogram_pnnx.test_inference() for a0, b0 in zip(a, b): if not torch.allclose(a0, b0, 1e-4, 1e-4): return False return True if __name__ == "__main__": if test(): exit(0) else: exit(1)