From e92cfd20ac6e08023fed80dba73f282cd5ef68e2 Mon Sep 17 00:00:00 2001 From: RogueException Date: Sat, 8 Apr 2017 17:12:10 -0300 Subject: [PATCH] Audio bugfixes and improvements. --- src/Discord.Net.Core/Audio/IAudioClient.cs | 34 +++-------- .../Audio/AudioClient.cs | 34 ++++------- .../Audio/Opus/OpusConverter.cs | 46 +++++++-------- .../Audio/Opus/OpusDecoder.cs | 28 ++++----- .../Audio/Opus/OpusEncoder.cs | 58 +++++-------------- .../Audio/Streams/BufferedWriteStream.cs | 8 +-- .../Audio/Streams/OpusDecodeStream.cs | 8 +-- .../Audio/Streams/OpusEncodeStream.cs | 36 +++++++----- .../Audio/Streams/RTPWriteStream.cs | 9 +-- 9 files changed, 98 insertions(+), 163 deletions(-) diff --git a/src/Discord.Net.Core/Audio/IAudioClient.cs b/src/Discord.Net.Core/Audio/IAudioClient.cs index a292c5aa8..c1c31af73 100644 --- a/src/Discord.Net.Core/Audio/IAudioClient.cs +++ b/src/Discord.Net.Core/Audio/IAudioClient.cs @@ -22,31 +22,13 @@ namespace Discord.Audio Task StopAsync(); - /// - /// Creates a new outgoing stream accepting Opus-encoded data. - /// - /// Samples per frame. Must be 120, 240, 480, 960, 1920 or 2880, representing 2.5, 5, 10, 20, 40 or 60 milliseconds respectively. - /// - AudioOutStream CreateOpusStream(int samplesPerFrame, int bufferMillis = 1000); - /// - /// Creates a new outgoing stream accepting Opus-encoded data. This is a direct stream with no internal timer. - /// - /// Samples per frame. Must be 120, 240, 480, 960, 1920 or 2880, representing 2.5, 5, 10, 20, 40 or 60 milliseconds respectively. - /// - AudioOutStream CreateDirectOpusStream(int samplesPerFrame); - /// - /// Creates a new outgoing stream accepting PCM (raw) data. - /// - /// Samples per frame. Must be 120, 240, 480, 960, 1920 or 2880, representing 2.5, 5, 10, 20, 40 or 60 milliseconds respectively. - /// - /// - AudioOutStream CreatePCMStream(AudioApplication application, int samplesPerFrame, int channels = 2, int? bitrate = null, int bufferMillis = 1000); - /// - /// Creates a new direct outgoing stream accepting PCM (raw) data. This is a direct stream with no internal timer. - /// - /// Samples per frame. Must be 120, 240, 480, 960, 1920 or 2880, representing 2.5, 5, 10, 20, 40 or 60 milliseconds respectively. - /// - /// - AudioOutStream CreateDirectPCMStream(AudioApplication application, int samplesPerFrame, int channels = 2, int? bitrate = null); + /// Creates a new outgoing stream accepting Opus-encoded data. + AudioOutStream CreateOpusStream(int bufferMillis = 1000); + /// Creates a new outgoing stream accepting Opus-encoded data. This is a direct stream with no internal timer. + AudioOutStream CreateDirectOpusStream(); + /// Creates a new outgoing stream accepting PCM (raw) data. + AudioOutStream CreatePCMStream(AudioApplication application, int? bitrate = null, int bufferMillis = 1000); + /// Creates a new direct outgoing stream accepting PCM (raw) data. This is a direct stream with no internal timer. + AudioOutStream CreateDirectPCMStream(AudioApplication application, int? bitrate = null); } } diff --git a/src/Discord.Net.WebSocket/Audio/AudioClient.cs b/src/Discord.Net.WebSocket/Audio/AudioClient.cs index 39814f9bf..c497b2632 100644 --- a/src/Discord.Net.WebSocket/Audio/AudioClient.cs +++ b/src/Discord.Net.WebSocket/Audio/AudioClient.cs @@ -139,43 +139,33 @@ namespace Discord.Audio await Discord.ApiClient.SendVoiceStateUpdateAsync(Guild.Id, null, false, false).ConfigureAwait(false); } - public AudioOutStream CreateOpusStream(int samplesPerFrame, int bufferMillis) + public AudioOutStream CreateOpusStream(int bufferMillis) { - CheckSamplesPerFrame(samplesPerFrame); var outputStream = new OutputStream(ApiClient); var sodiumEncrypter = new SodiumEncryptStream( outputStream, this); - var rtpWriter = new RTPWriteStream(sodiumEncrypter, samplesPerFrame, _ssrc); - return new BufferedWriteStream(rtpWriter, this, samplesPerFrame, bufferMillis, _connection.CancelToken, _audioLogger); + var rtpWriter = new RTPWriteStream(sodiumEncrypter, _ssrc); + return new BufferedWriteStream(rtpWriter, this, bufferMillis, _connection.CancelToken, _audioLogger); } - public AudioOutStream CreateDirectOpusStream(int samplesPerFrame) + public AudioOutStream CreateDirectOpusStream() { - CheckSamplesPerFrame(samplesPerFrame); var outputStream = new OutputStream(ApiClient); var sodiumEncrypter = new SodiumEncryptStream(outputStream, this); - return new RTPWriteStream(sodiumEncrypter, samplesPerFrame, _ssrc); + return new RTPWriteStream(sodiumEncrypter, _ssrc); } - public AudioOutStream CreatePCMStream(AudioApplication application, int samplesPerFrame, int channels, int? bitrate, int bufferMillis) + public AudioOutStream CreatePCMStream(AudioApplication application, int? bitrate, int bufferMillis) { - CheckSamplesPerFrame(samplesPerFrame); var outputStream = new OutputStream(ApiClient); var sodiumEncrypter = new SodiumEncryptStream(outputStream, this); - var rtpWriter = new RTPWriteStream(sodiumEncrypter, samplesPerFrame, _ssrc); - var bufferedStream = new BufferedWriteStream(rtpWriter, this, samplesPerFrame, bufferMillis, _connection.CancelToken, _audioLogger); - return new OpusEncodeStream(bufferedStream, channels, samplesPerFrame, bitrate ?? (96 * 1024), application); + var rtpWriter = new RTPWriteStream(sodiumEncrypter, _ssrc); + var bufferedStream = new BufferedWriteStream(rtpWriter, this, bufferMillis, _connection.CancelToken, _audioLogger); + return new OpusEncodeStream(bufferedStream, bitrate ?? (96 * 1024), application); } - public AudioOutStream CreateDirectPCMStream(AudioApplication application, int samplesPerFrame, int channels, int? bitrate) + public AudioOutStream CreateDirectPCMStream(AudioApplication application, int? bitrate) { - CheckSamplesPerFrame(samplesPerFrame); var outputStream = new OutputStream(ApiClient); var sodiumEncrypter = new SodiumEncryptStream(outputStream, this); - var rtpWriter = new RTPWriteStream(sodiumEncrypter, samplesPerFrame, _ssrc); - return new OpusEncodeStream(rtpWriter, channels, samplesPerFrame, bitrate ?? (96 * 1024), application); - } - private void CheckSamplesPerFrame(int samplesPerFrame) - { - if (samplesPerFrame != 120 && samplesPerFrame != 240 && samplesPerFrame != 480 && - samplesPerFrame != 960 && samplesPerFrame != 1920 && samplesPerFrame != 2880) - throw new ArgumentException("Value must be 120, 240, 480, 960, 1920 or 2880", nameof(samplesPerFrame)); + var rtpWriter = new RTPWriteStream(sodiumEncrypter, _ssrc); + return new OpusEncodeStream(rtpWriter, bitrate ?? (96 * 1024), application); } internal async Task CreateInputStreamAsync(ulong userId) diff --git a/src/Discord.Net.WebSocket/Audio/Opus/OpusConverter.cs b/src/Discord.Net.WebSocket/Audio/Opus/OpusConverter.cs index 95874cdf1..f802d65ad 100644 --- a/src/Discord.Net.WebSocket/Audio/Opus/OpusConverter.cs +++ b/src/Discord.Net.WebSocket/Audio/Opus/OpusConverter.cs @@ -6,37 +6,22 @@ namespace Discord.Audio { protected IntPtr _ptr; - /// Gets the bit rate of this converter. - public const int BitsPerSample = sizeof(short) * 8; - /// Gets the bytes per sample. - public const int SampleSize = (BitsPerSample / 8) * MaxChannels; - /// Gets the maximum amount of channels this encoder supports. - public const int MaxChannels = 2; + public const int SamplingRate = 48000; + public const int Channels = 2; + public const int FrameMillis = 20; - /// Gets the input sampling rate of this converter. - public int SamplingRate { get; } - /// Gets the number of samples per second for this stream. - public int Channels { get; } + public const int SampleBytes = sizeof(short) * Channels; - protected OpusConverter(int samplingRate, int channels) - { - if (samplingRate != 8000 && samplingRate != 12000 && - samplingRate != 16000 && samplingRate != 24000 && - samplingRate != 48000) - throw new ArgumentOutOfRangeException(nameof(samplingRate)); - if (channels != 1 && channels != 2) - throw new ArgumentOutOfRangeException(nameof(channels)); - - SamplingRate = samplingRate; - Channels = channels; - } + public const int FrameSamples = SamplingRate / 1000 * FrameMillis; + public const int FrameSamplesPerChannel = SamplingRate / 1000 * FrameMillis; + public const int FrameBytes = FrameSamples * SampleBytes; - private bool disposedValue = false; // To detect redundant calls + protected bool _isDisposed = false; protected virtual void Dispose(bool disposing) { - if (!disposedValue) - disposedValue = true; + if (!_isDisposed) + _isDisposed = true; } ~OpusConverter() { @@ -47,5 +32,16 @@ namespace Discord.Audio Dispose(true); GC.SuppressFinalize(this); } + + protected static void CheckError(int result) + { + if (result < 0) + throw new Exception($"Opus Error: {(OpusError)result}"); + } + protected static void CheckError(OpusError error) + { + if ((int)error < 0) + throw new Exception($"Opus Error: {error}"); + } } } diff --git a/src/Discord.Net.WebSocket/Audio/Opus/OpusDecoder.cs b/src/Discord.Net.WebSocket/Audio/Opus/OpusDecoder.cs index 605cd3467..c5c16dff6 100644 --- a/src/Discord.Net.WebSocket/Audio/Opus/OpusDecoder.cs +++ b/src/Discord.Net.WebSocket/Audio/Opus/OpusDecoder.cs @@ -14,37 +14,29 @@ namespace Discord.Audio [DllImport("opus", EntryPoint = "opus_decoder_ctl", CallingConvention = CallingConvention.Cdecl)] private static extern int DecoderCtl(IntPtr st, OpusCtl request, int value); - public OpusDecoder(int samplingRate, int channels) - : base(samplingRate, channels) + public OpusDecoder() { - OpusError error; - _ptr = CreateDecoder(samplingRate, channels, out error); - if (error != OpusError.OK) - throw new Exception($"Opus Error: {error}"); + _ptr = CreateDecoder(SamplingRate, Channels, out var error); + CheckError(error); } - /// Produces PCM samples from Opus-encoded audio. - /// PCM samples to decode. - /// Offset of the frame in input. - /// Buffer to store the decoded frame. public unsafe int DecodeFrame(byte[] input, int inputOffset, int inputCount, byte[] output, int outputOffset) { int result = 0; fixed (byte* inPtr = input) fixed (byte* outPtr = output) - result = Decode(_ptr, inPtr + inputOffset, inputCount, outPtr + outputOffset, (output.Length - outputOffset) / SampleSize, 0); //TODO: Enable FEC - - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - return result * SampleSize; + result = Decode(_ptr, inPtr + inputOffset, inputCount, outPtr + outputOffset, FrameBytes / SampleBytes, 1); + CheckError(result); + return FrameBytes; } protected override void Dispose(bool disposing) { - if (_ptr != IntPtr.Zero) + if (!_isDisposed) { - DestroyDecoder(_ptr); - _ptr = IntPtr.Zero; + if (_ptr != IntPtr.Zero) + DestroyDecoder(_ptr); + base.Dispose(disposing); } } } diff --git a/src/Discord.Net.WebSocket/Audio/Opus/OpusEncoder.cs b/src/Discord.Net.WebSocket/Audio/Opus/OpusEncoder.cs index c85e21834..a12854d69 100644 --- a/src/Discord.Net.WebSocket/Audio/Opus/OpusEncoder.cs +++ b/src/Discord.Net.WebSocket/Audio/Opus/OpusEncoder.cs @@ -12,14 +12,12 @@ namespace Discord.Audio [DllImport("opus", EntryPoint = "opus_encode", CallingConvention = CallingConvention.Cdecl)] private static extern int Encode(IntPtr st, byte* pcm, int frame_size, byte* data, int max_data_bytes); [DllImport("opus", EntryPoint = "opus_encoder_ctl", CallingConvention = CallingConvention.Cdecl)] - private static extern int EncoderCtl(IntPtr st, OpusCtl request, int value); + private static extern OpusError EncoderCtl(IntPtr st, OpusCtl request, int value); - /// Gets the coding mode of the encoder. public AudioApplication Application { get; } public int BitRate { get;} - public OpusEncoder(int samplingRate, int channels, int bitrate, AudioApplication application) - : base(samplingRate, channels) + public OpusEncoder(int bitrate, AudioApplication application) { if (bitrate < 1 || bitrate > DiscordVoiceAPIClient.MaxBitrate) throw new ArgumentOutOfRangeException(nameof(bitrate)); @@ -47,57 +45,31 @@ namespace Discord.Audio throw new ArgumentOutOfRangeException(nameof(application)); } - OpusError error; - _ptr = CreateEncoder(samplingRate, channels, (int)opusApplication, out error); - if (error != OpusError.OK) - throw new Exception($"Opus Error: {error}"); - - var result = EncoderCtl(_ptr, OpusCtl.SetSignal, (int)opusSignal); - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - - result = EncoderCtl(_ptr, OpusCtl.SetPacketLossPercent, 30); //%% - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - - result = EncoderCtl(_ptr, OpusCtl.SetInbandFEC, 1); //True - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - - result = EncoderCtl(_ptr, OpusCtl.SetBitrate, bitrate); - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - - /*if (application == AudioApplication.Music) - { - result = EncoderCtl(_ptr, OpusCtl.SetBandwidth, 1105); - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); - }*/ + _ptr = CreateEncoder(SamplingRate, Channels, (int)opusApplication, out var error); + CheckError(error); + CheckError(EncoderCtl(_ptr, OpusCtl.SetSignal, (int)opusSignal)); + CheckError(EncoderCtl(_ptr, OpusCtl.SetPacketLossPercent, 30)); //% + CheckError(EncoderCtl(_ptr, OpusCtl.SetInbandFEC, 1)); //True + CheckError(EncoderCtl(_ptr, OpusCtl.SetBitrate, bitrate)); } - /// Produces Opus encoded audio from PCM samples. - /// PCM samples to encode. - /// Buffer to store the encoded frame. - /// Length of the frame contained in outputBuffer. - public unsafe int EncodeFrame(byte[] input, int inputOffset, int inputCount, byte[] output, int outputOffset) + public unsafe int EncodeFrame(byte[] input, int inputOffset, byte[] output, int outputOffset) { int result = 0; fixed (byte* inPtr = input) fixed (byte* outPtr = output) - result = Encode(_ptr, inPtr + inputOffset, inputCount / SampleSize, outPtr + outputOffset, output.Length - outputOffset); - - if (result < 0) - throw new Exception($"Opus Error: {(OpusError)result}"); + result = Encode(_ptr, inPtr + inputOffset, FrameSamplesPerChannel, outPtr + outputOffset, output.Length - outputOffset); + CheckError(result); return result; } protected override void Dispose(bool disposing) { - if (_ptr != IntPtr.Zero) + if (!_isDisposed) { - DestroyEncoder(_ptr); - _ptr = IntPtr.Zero; + if (_ptr != IntPtr.Zero) + DestroyEncoder(_ptr); + base.Dispose(disposing); } } } diff --git a/src/Discord.Net.WebSocket/Audio/Streams/BufferedWriteStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/BufferedWriteStream.cs index d603f61a5..1764fa66a 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/BufferedWriteStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/BufferedWriteStream.cs @@ -38,14 +38,14 @@ namespace Discord.Audio.Streams private bool _isPreloaded; private int _silenceFrames; - public BufferedWriteStream(AudioStream next, IAudioClient client, int samplesPerFrame, int bufferMillis, CancellationToken cancelToken, int maxFrameSize = 1500) - : this(next, client as AudioClient, samplesPerFrame, bufferMillis, cancelToken, null, maxFrameSize) { } - internal BufferedWriteStream(AudioStream next, AudioClient client, int samplesPerFrame, int bufferMillis, CancellationToken cancelToken, Logger logger, int maxFrameSize = 1500) + public BufferedWriteStream(AudioStream next, IAudioClient client, int bufferMillis, CancellationToken cancelToken, int maxFrameSize = 1500) + : this(next, client as AudioClient, bufferMillis, cancelToken, null, maxFrameSize) { } + internal BufferedWriteStream(AudioStream next, AudioClient client, int bufferMillis, CancellationToken cancelToken, Logger logger, int maxFrameSize = 1500) { //maxFrameSize = 1275 was too limiting at 128kbps,2ch,60ms _next = next; _client = client; - _ticksPerFrame = samplesPerFrame / 48; + _ticksPerFrame = OpusEncoder.FrameSamples / 48; _logger = logger; _queueLength = (bufferMillis + (_ticksPerFrame - 1)) / _ticksPerFrame; //Round up diff --git a/src/Discord.Net.WebSocket/Audio/Streams/OpusDecodeStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/OpusDecodeStream.cs index c46e16cd3..96c809cca 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/OpusDecodeStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/OpusDecodeStream.cs @@ -9,14 +9,14 @@ namespace Discord.Audio.Streams public const int SampleRate = OpusEncodeStream.SampleRate; private readonly AudioStream _next; - private readonly byte[] _buffer; private readonly OpusDecoder _decoder; + private readonly byte[] _buffer; - public OpusDecodeStream(AudioStream next, int channels = OpusConverter.MaxChannels, int bufferSize = 5760 * 2 * sizeof(short)) + public OpusDecodeStream(AudioStream next) { _next = next; - _buffer = new byte[bufferSize]; - _decoder = new OpusDecoder(SampleRate, channels); + _buffer = new byte[OpusConverter.FrameBytes]; + _decoder = new OpusDecoder(); } public override async Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) diff --git a/src/Discord.Net.WebSocket/Audio/Streams/OpusEncodeStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/OpusEncodeStream.cs index ac6284c91..2a3c03a47 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/OpusEncodeStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/OpusEncodeStream.cs @@ -12,18 +12,13 @@ namespace Discord.Audio.Streams private readonly AudioStream _next; private readonly OpusEncoder _encoder; private readonly byte[] _buffer; - - private int _frameSize; - private byte[] _partialFrameBuffer; private int _partialFramePos; - public OpusEncodeStream(AudioStream next, int channels, int samplesPerFrame, int bitrate, AudioApplication application, int bufferSize = 4000) + public OpusEncodeStream(AudioStream next, int bitrate, AudioApplication application) { _next = next; - _encoder = new OpusEncoder(SampleRate, channels, bitrate, application); - _frameSize = samplesPerFrame * channels * 2; - _buffer = new byte[bufferSize]; - _partialFrameBuffer = new byte[_frameSize]; + _encoder = new OpusEncoder(bitrate, application); + _buffer = new byte[OpusConverter.FrameBytes]; } public override async Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -31,20 +26,31 @@ namespace Discord.Audio.Streams //Assume threadsafe while (count > 0) { - if (_partialFramePos + count >= _frameSize) + if (_partialFramePos == 0 && count >= OpusConverter.FrameBytes) + { + //We have enough data and no partial frames. Pass the buffer directly to the encoder + int encFrameSize = _encoder.EncodeFrame(buffer, offset, _buffer, 0); + await _next.WriteAsync(_buffer, 0, encFrameSize, cancellationToken).ConfigureAwait(false); + + offset += OpusConverter.FrameBytes; + count -= OpusConverter.FrameBytes; + } + else if (_partialFramePos + count >= OpusConverter.FrameBytes) { - int partialSize = _frameSize - _partialFramePos; - Buffer.BlockCopy(buffer, offset, _partialFrameBuffer, _partialFramePos, partialSize); + //We have enough data to complete a previous partial frame. + int partialSize = OpusConverter.FrameBytes - _partialFramePos; + Buffer.BlockCopy(buffer, offset, _buffer, _partialFramePos, partialSize); + int encFrameSize = _encoder.EncodeFrame(_buffer, 0, _buffer, 0); + await _next.WriteAsync(_buffer, 0, encFrameSize, cancellationToken).ConfigureAwait(false); + offset += partialSize; count -= partialSize; _partialFramePos = 0; - - int encFrameSize = _encoder.EncodeFrame(_partialFrameBuffer, 0, _frameSize, _buffer, 0); - await _next.WriteAsync(_buffer, 0, encFrameSize, cancellationToken).ConfigureAwait(false); } else { - Buffer.BlockCopy(buffer, offset, _partialFrameBuffer, _partialFramePos, count); + //Not enough data to build a complete frame, store this part for later + Buffer.BlockCopy(buffer, offset, _buffer, _partialFramePos, count); _partialFramePos += count; break; } diff --git a/src/Discord.Net.WebSocket/Audio/Streams/RTPWriteStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/RTPWriteStream.cs index b8d58c997..40d6f21f5 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/RTPWriteStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/RTPWriteStream.cs @@ -9,15 +9,12 @@ namespace Discord.Audio.Streams { private readonly AudioStream _next; private readonly byte[] _header; - private int _samplesPerFrame; - private uint _ssrc, _timestamp = 0; - protected readonly byte[] _buffer; + private uint _ssrc, _timestamp = 0; - public RTPWriteStream(AudioStream next, int samplesPerFrame, uint ssrc, int bufferSize = 4000) + public RTPWriteStream(AudioStream next, uint ssrc, int bufferSize = 4000) { _next = next; - _samplesPerFrame = samplesPerFrame; _ssrc = ssrc; _buffer = new byte[bufferSize]; _header = new byte[24]; @@ -38,7 +35,7 @@ namespace Discord.Audio.Streams if (_header[3]++ == byte.MaxValue) _header[2]++; - _timestamp += (uint)_samplesPerFrame; + _timestamp += (uint)OpusEncoder.FrameSamples; _header[4] = (byte)(_timestamp >> 24); _header[5] = (byte)(_timestamp >> 16); _header[6] = (byte)(_timestamp >> 8);