You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

VoiceWebSocket.cs 16 kB

10 years ago
10 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. using Discord.API;
  2. using Discord.Audio;
  3. using Discord.Audio.Opus;
  4. using Discord.Audio.Sodium;
  5. using Newtonsoft.Json;
  6. using Newtonsoft.Json.Linq;
  7. using System;
  8. using System.Collections.Concurrent;
  9. using System.Collections.Generic;
  10. using System.Diagnostics;
  11. using System.Linq;
  12. using System.Net;
  13. using System.Net.Sockets;
  14. using System.Text;
  15. using System.Threading;
  16. using System.Threading.Tasks;
  17. namespace Discord.Net.WebSockets
  18. {
  19. public partial class VoiceWebSocket : WebSocket
  20. {
  21. private const int MaxOpusSize = 4000;
  22. private const string EncryptedMode = "xsalsa20_poly1305";
  23. private const string UnencryptedMode = "plain";
  24. //private readonly Random _rand;
  25. private readonly int _targetAudioBufferLength;
  26. private readonly ConcurrentDictionary<uint, OpusDecoder> _decoders;
  27. private readonly DiscordAudioClient _audioClient;
  28. private readonly AudioServiceConfig _config;
  29. private OpusEncoder _encoder;
  30. private uint _ssrc;
  31. private ConcurrentDictionary<uint, long> _ssrcMapping;
  32. private VoiceBuffer _sendBuffer;
  33. private UdpClient _udp;
  34. private IPEndPoint _endpoint;
  35. private bool _isEncrypted;
  36. private byte[] _secretKey, _encodingBuffer;
  37. private ushort _sequence;
  38. private long? _serverId, _channelId;
  39. private string _encryptionMode;
  40. private int _ping;
  41. private Thread _sendThread, _receiveThread;
  42. public long? ServerId { get { return _serverId; } internal set { _serverId = value; } }
  43. public long? ChannelId { get { return _channelId; } internal set { _channelId = value; } }
  44. public int Ping => _ping;
  45. internal VoiceBuffer OutputBuffer => _sendBuffer;
  46. public VoiceWebSocket(DiscordClient client, DiscordAudioClient audioClient, Logger logger)
  47. : base(client, logger)
  48. {
  49. _audioClient = audioClient;
  50. _config = client.Audio().Config;
  51. _decoders = new ConcurrentDictionary<uint, OpusDecoder>();
  52. _targetAudioBufferLength = _config.BufferLength / 20; //20 ms frames
  53. _encodingBuffer = new byte[MaxOpusSize];
  54. _ssrcMapping = new ConcurrentDictionary<uint, long>();
  55. _encoder = new OpusEncoder(48000, _config.Channels, 20, _config.Bitrate, OpusApplication.Audio);
  56. _sendBuffer = new VoiceBuffer((int)Math.Ceiling(_config.BufferLength / (double)_encoder.FrameLength), _encoder.FrameSize);
  57. }
  58. public async Task Connect()
  59. {
  60. await BeginConnect().ConfigureAwait(false);
  61. }
  62. public async Task Reconnect()
  63. {
  64. try
  65. {
  66. var cancelToken = ParentCancelToken.Value;
  67. await Task.Delay(_client.Config.ReconnectDelay, cancelToken).ConfigureAwait(false);
  68. while (!cancelToken.IsCancellationRequested)
  69. {
  70. try
  71. {
  72. await Connect().ConfigureAwait(false);
  73. break;
  74. }
  75. catch (OperationCanceledException) { throw; }
  76. catch (Exception ex)
  77. {
  78. _logger.Error("Reconnect failed", ex);
  79. //Net is down? We can keep trying to reconnect until the user runs Disconnect()
  80. await Task.Delay(_client.Config.FailedReconnectDelay, cancelToken).ConfigureAwait(false);
  81. }
  82. }
  83. }
  84. catch (OperationCanceledException) { }
  85. }
  86. public Task Disconnect() => _taskManager.Stop();
  87. protected override async Task Run()
  88. {
  89. _udp = new UdpClient(new IPEndPoint(IPAddress.Any, 0));
  90. List<Task> tasks = new List<Task>();
  91. if ((_config.Mode & AudioMode.Outgoing) != 0)
  92. {
  93. _sendThread = new Thread(new ThreadStart(() => SendVoiceAsync(_cancelToken)));
  94. _sendThread.IsBackground = true;
  95. _sendThread.Start();
  96. }
  97. if ((_config.Mode & AudioMode.Incoming) != 0)
  98. {
  99. _receiveThread = new Thread(new ThreadStart(() => ReceiveVoiceAsync(_cancelToken)));
  100. _receiveThread.IsBackground = true;
  101. _receiveThread.Start();
  102. }
  103. SendIdentify();
  104. #if !DOTNET5_4
  105. tasks.Add(WatcherAsync());
  106. #endif
  107. tasks.AddRange(_engine.GetTasks(_cancelToken));
  108. tasks.Add(HeartbeatAsync(_cancelToken));
  109. await _taskManager.Start(tasks, _cancelTokenSource).ConfigureAwait(false);
  110. }
  111. protected override Task Cleanup()
  112. {
  113. if (_sendThread != null)
  114. _sendThread.Join();
  115. if (_receiveThread != null)
  116. _receiveThread.Join();
  117. _sendThread = null;
  118. _receiveThread = null;
  119. OpusDecoder decoder;
  120. foreach (var pair in _decoders)
  121. {
  122. if (_decoders.TryRemove(pair.Key, out decoder))
  123. decoder.Dispose();
  124. }
  125. ClearPCMFrames();
  126. _udp = null;
  127. return base.Cleanup();
  128. }
  129. private void ReceiveVoiceAsync(CancellationToken cancelToken)
  130. {
  131. var closeTask = cancelToken.Wait();
  132. try
  133. {
  134. byte[] packet, decodingBuffer = null, nonce = null, result;
  135. int packetLength, resultOffset, resultLength;
  136. IPEndPoint endpoint = new IPEndPoint(IPAddress.Any, 0);
  137. if ((_config.Mode & AudioMode.Incoming) != 0)
  138. {
  139. decodingBuffer = new byte[MaxOpusSize];
  140. nonce = new byte[24];
  141. }
  142. while (!cancelToken.IsCancellationRequested)
  143. {
  144. Thread.Sleep(1);
  145. if (_udp.Available > 0)
  146. {
  147. #if !DOTNET5_4
  148. packet = _udp.Receive(ref endpoint);
  149. #else
  150. //TODO: Is this really the only way to end a Receive call in DOTNET5_4?
  151. var receiveTask = _udp.ReceiveAsync();
  152. var task = Task.WhenAny(closeTask, receiveTask).Result;
  153. if (task == closeTask)
  154. break;
  155. var udpPacket = receiveTask.Result;
  156. packet = udpPacket.Buffer;
  157. endpoint = udpPacket.RemoteEndPoint;
  158. #endif
  159. packetLength = packet.Length;
  160. if (packetLength > 0 && endpoint.Equals(_endpoint))
  161. {
  162. if (_state != ConnectionState.Connected)
  163. {
  164. if (packetLength != 70)
  165. return;
  166. string ip = Encoding.UTF8.GetString(packet, 4, 70 - 6).TrimEnd('\0');
  167. int port = packet[68] | packet[69] << 8;
  168. SendSelectProtocol(ip, port);
  169. if ((_config.Mode & AudioMode.Incoming) == 0)
  170. return; //We dont need this thread anymore
  171. }
  172. else
  173. {
  174. //Parse RTP Data
  175. if (packetLength < 12) return;
  176. if (packet[0] != 0x80) return; //Flags
  177. if (packet[1] != 0x78) return; //Payload Type
  178. ushort sequenceNumber = (ushort)((packet[2] << 8) |
  179. packet[3] << 0);
  180. uint timestamp = (uint)((packet[4] << 24) |
  181. (packet[5] << 16) |
  182. (packet[6] << 8) |
  183. (packet[7] << 0));
  184. uint ssrc = (uint)((packet[8] << 24) |
  185. (packet[9] << 16) |
  186. (packet[10] << 8) |
  187. (packet[11] << 0));
  188. //Decrypt
  189. if (_isEncrypted)
  190. {
  191. if (packetLength < 28) //12 + 16 (RTP + Poly1305 MAC)
  192. return;
  193. Buffer.BlockCopy(packet, 0, nonce, 0, 12);
  194. int ret = SecretBox.Decrypt(packet, 12, packetLength - 12, decodingBuffer, nonce, _secretKey);
  195. if (ret != 0)
  196. continue;
  197. result = decodingBuffer;
  198. resultOffset = 0;
  199. resultLength = packetLength - 28;
  200. }
  201. else //Plain
  202. {
  203. result = packet;
  204. resultOffset = 12;
  205. resultLength = packetLength - 12;
  206. }
  207. /*if (_logLevel >= LogMessageSeverity.Debug)
  208. RaiseOnLog(LogMessageSeverity.Debug, $"Received {buffer.Length - 12} bytes.");*/
  209. long userId;
  210. if (_ssrcMapping.TryGetValue(ssrc, out userId))
  211. RaiseOnPacket(userId, _channelId.Value, result, resultOffset, resultLength);
  212. }
  213. }
  214. }
  215. }
  216. }
  217. catch (OperationCanceledException) { }
  218. catch (InvalidOperationException) { } //Includes ObjectDisposedException
  219. }
  220. private void SendVoiceAsync(CancellationToken cancelToken)
  221. {
  222. try
  223. {
  224. while (!cancelToken.IsCancellationRequested && _state != ConnectionState.Connected)
  225. Thread.Sleep(1);
  226. if (cancelToken.IsCancellationRequested)
  227. return;
  228. byte[] frame = new byte[_encoder.FrameSize];
  229. byte[] encodedFrame = new byte[MaxOpusSize];
  230. byte[] voicePacket, pingPacket, nonce = null;
  231. uint timestamp = 0;
  232. double nextTicks = 0.0, nextPingTicks = 0.0;
  233. long ticksPerSeconds = Stopwatch.Frequency;
  234. double ticksPerMillisecond = Stopwatch.Frequency / 1000.0;
  235. double ticksPerFrame = ticksPerMillisecond * _encoder.FrameLength;
  236. double spinLockThreshold = 3 * ticksPerMillisecond;
  237. uint samplesPerFrame = (uint)_encoder.SamplesPerFrame;
  238. Stopwatch sw = Stopwatch.StartNew();
  239. if (_isEncrypted)
  240. {
  241. nonce = new byte[24];
  242. voicePacket = new byte[MaxOpusSize + 12 + 16];
  243. }
  244. else
  245. voicePacket = new byte[MaxOpusSize + 12];
  246. pingPacket = new byte[8];
  247. pingPacket[0] = 0x80; //Flags;
  248. pingPacket[1] = 0xC9; //Payload Type
  249. pingPacket[2] = 0x00; //Length
  250. pingPacket[3] = 0x01; //Length (1*8 bytes)
  251. pingPacket[4] = (byte)((_ssrc >> 24) & 0xFF);
  252. pingPacket[5] = (byte)((_ssrc >> 16) & 0xFF);
  253. pingPacket[6] = (byte)((_ssrc >> 8) & 0xFF);
  254. pingPacket[7] = (byte)((_ssrc >> 0) & 0xFF);
  255. if (_isEncrypted)
  256. {
  257. Buffer.BlockCopy(pingPacket, 0, nonce, 0, 8);
  258. int ret = SecretBox.Encrypt(pingPacket, 8, encodedFrame, 0, nonce, _secretKey);
  259. if (ret != 0)
  260. throw new InvalidOperationException("Failed to encrypt ping packet");
  261. pingPacket = new byte[pingPacket.Length + 16];
  262. Buffer.BlockCopy(encodedFrame, 0, pingPacket, 0, pingPacket.Length);
  263. Array.Clear(nonce, 0, nonce.Length);
  264. }
  265. int rtpPacketLength = 0;
  266. voicePacket[0] = 0x80; //Flags;
  267. voicePacket[1] = 0x78; //Payload Type
  268. voicePacket[8] = (byte)((_ssrc >> 24) & 0xFF);
  269. voicePacket[9] = (byte)((_ssrc >> 16) & 0xFF);
  270. voicePacket[10] = (byte)((_ssrc >> 8) & 0xFF);
  271. voicePacket[11] = (byte)((_ssrc >> 0) & 0xFF);
  272. if (_isEncrypted)
  273. Buffer.BlockCopy(voicePacket, 0, nonce, 0, 12);
  274. bool hasFrame = false;
  275. while (!cancelToken.IsCancellationRequested)
  276. {
  277. if (!hasFrame && _sendBuffer.Pop(frame))
  278. {
  279. ushort sequence = unchecked(_sequence++);
  280. voicePacket[2] = (byte)((sequence >> 8) & 0xFF);
  281. voicePacket[3] = (byte)((sequence >> 0) & 0xFF);
  282. voicePacket[4] = (byte)((timestamp >> 24) & 0xFF);
  283. voicePacket[5] = (byte)((timestamp >> 16) & 0xFF);
  284. voicePacket[6] = (byte)((timestamp >> 8) & 0xFF);
  285. voicePacket[7] = (byte)((timestamp >> 0) & 0xFF);
  286. //Encode
  287. int encodedLength = _encoder.EncodeFrame(frame, 0, encodedFrame);
  288. //Encrypt
  289. if (_isEncrypted)
  290. {
  291. Buffer.BlockCopy(voicePacket, 2, nonce, 2, 6); //Update nonce
  292. int ret = SecretBox.Encrypt(encodedFrame, encodedLength, voicePacket, 12, nonce, _secretKey);
  293. if (ret != 0)
  294. continue;
  295. rtpPacketLength = encodedLength + 12 + 16;
  296. }
  297. else
  298. {
  299. Buffer.BlockCopy(encodedFrame, 0, voicePacket, 12, encodedLength);
  300. rtpPacketLength = encodedLength + 12;
  301. }
  302. timestamp = unchecked(timestamp + samplesPerFrame);
  303. hasFrame = true;
  304. }
  305. long currentTicks = sw.ElapsedTicks;
  306. double ticksToNextFrame = nextTicks - currentTicks;
  307. if (ticksToNextFrame <= 0.0)
  308. {
  309. if (hasFrame)
  310. {
  311. try
  312. {
  313. _udp.Send(voicePacket, rtpPacketLength);
  314. }
  315. catch (SocketException ex)
  316. {
  317. _logger.Error("Failed to send UDP packet.", ex);
  318. }
  319. hasFrame = false;
  320. }
  321. nextTicks += ticksPerFrame;
  322. //Is it time to send out another ping?
  323. if (currentTicks > nextPingTicks)
  324. {
  325. _udp.Send(pingPacket, pingPacket.Length);
  326. nextPingTicks = currentTicks + 5 * ticksPerSeconds;
  327. }
  328. }
  329. else
  330. {
  331. if (hasFrame)
  332. {
  333. int time = (int)Math.Floor(ticksToNextFrame / ticksPerMillisecond);
  334. if (time > 0)
  335. Thread.Sleep(time);
  336. }
  337. else
  338. Thread.Sleep(1); //Give as much time to the encrypter as possible
  339. }
  340. }
  341. }
  342. catch (OperationCanceledException) { }
  343. catch (InvalidOperationException) { } //Includes ObjectDisposedException
  344. }
  345. #if !DOTNET5_4
  346. //Closes the UDP socket when _disconnectToken is triggered, since UDPClient doesn't allow passing a canceltoken
  347. private Task WatcherAsync()
  348. {
  349. var cancelToken = _cancelToken;
  350. return cancelToken.Wait()
  351. .ContinueWith(_ => _udp.Close());
  352. }
  353. #endif
  354. protected override async Task ProcessMessage(string json)
  355. {
  356. await base.ProcessMessage(json).ConfigureAwait(false);
  357. var msg = JsonConvert.DeserializeObject<WebSocketMessage>(json);
  358. var opCode = (VoiceOpCodes)msg.Operation;
  359. switch (opCode)
  360. {
  361. case VoiceOpCodes.Ready:
  362. {
  363. if (_state != ConnectionState.Connected)
  364. {
  365. var payload = (msg.Payload as JToken).ToObject<VoiceReadyEvent>(_serializer);
  366. _heartbeatInterval = payload.HeartbeatInterval;
  367. _ssrc = payload.SSRC;
  368. var address = (await Dns.GetHostAddressesAsync(Host.Replace("wss://", "")).ConfigureAwait(false)).FirstOrDefault();
  369. _endpoint = new IPEndPoint(address, payload.Port);
  370. if (_config.EnableEncryption)
  371. {
  372. if (payload.Modes.Contains(EncryptedMode))
  373. {
  374. _encryptionMode = EncryptedMode;
  375. _isEncrypted = true;
  376. }
  377. else
  378. throw new InvalidOperationException("Unexpected encryption format.");
  379. }
  380. else
  381. {
  382. _encryptionMode = UnencryptedMode;
  383. _isEncrypted = false;
  384. }
  385. _udp.Connect(_endpoint);
  386. _sequence = 0;// (ushort)_rand.Next(0, ushort.MaxValue);
  387. //No thread issue here because SendAsync doesn't start until _isReady is true
  388. byte[] packet = new byte[70];
  389. packet[0] = (byte)((_ssrc >> 24) & 0xFF);
  390. packet[1] = (byte)((_ssrc >> 16) & 0xFF);
  391. packet[2] = (byte)((_ssrc >> 8) & 0xFF);
  392. packet[3] = (byte)((_ssrc >> 0) & 0xFF);
  393. await _udp.SendAsync(packet, 70).ConfigureAwait(false);
  394. }
  395. }
  396. break;
  397. case VoiceOpCodes.Heartbeat:
  398. {
  399. long time = EpochTime.GetMilliseconds();
  400. var payload = (long)msg.Payload;
  401. _ping = (int)(payload - time);
  402. //TODO: Use this to estimate latency
  403. }
  404. break;
  405. case VoiceOpCodes.SessionDescription:
  406. {
  407. var payload = (msg.Payload as JToken).ToObject<JoinServerEvent>(_serializer);
  408. _secretKey = payload.SecretKey;
  409. SendIsTalking(true);
  410. EndConnect();
  411. }
  412. break;
  413. case VoiceOpCodes.Speaking:
  414. {
  415. var payload = (msg.Payload as JToken).ToObject<IsTalkingEvent>(_serializer);
  416. RaiseIsSpeaking(payload.UserId, payload.IsSpeaking);
  417. }
  418. break;
  419. default:
  420. if (_logger.Level >= LogSeverity.Warning)
  421. _logger.Warning($"Unknown Opcode: {opCode}");
  422. break;
  423. }
  424. }
  425. public void SendPCMFrames(byte[] data, int bytes)
  426. {
  427. _sendBuffer.Push(data, bytes, _cancelToken);
  428. }
  429. public void ClearPCMFrames()
  430. {
  431. _sendBuffer.Clear(_cancelToken);
  432. }
  433. public void WaitForQueue()
  434. {
  435. _sendBuffer.Wait(_cancelToken);
  436. }
  437. public Task WaitForConnection(int timeout)
  438. {
  439. return Task.Run(() =>
  440. {
  441. try
  442. {
  443. if (!_connectedEvent.Wait(timeout, _cancelToken))
  444. throw new TimeoutException();
  445. }
  446. catch (OperationCanceledException)
  447. {
  448. _taskManager.ThrowException();
  449. }
  450. });
  451. }
  452. public void SendIdentify()
  453. {
  454. var msg = new IdentifyCommand();
  455. msg.Payload.ServerId = _serverId.Value;
  456. msg.Payload.SessionId = _client.SessionId;
  457. msg.Payload.Token = _audioClient.Token;
  458. msg.Payload.UserId = _client.UserId.Value;
  459. QueueMessage(msg);
  460. }
  461. public void SendSelectProtocol(string externalIp, int externalPort)
  462. {
  463. var msg = new SelectProtocolCommand();
  464. msg.Payload.Protocol = "udp";
  465. msg.Payload.SocketData.Address = externalIp;
  466. msg.Payload.SocketData.Mode = _encryptionMode;
  467. msg.Payload.SocketData.Port = externalPort;
  468. QueueMessage(msg);
  469. }
  470. public void SendIsTalking(bool value)
  471. {
  472. var isTalking = new SpeakingCommand();
  473. isTalking.Payload.IsSpeaking = value;
  474. isTalking.Payload.Delay = 0;
  475. QueueMessage(isTalking);
  476. }
  477. public override void SendHeartbeat()
  478. {
  479. QueueMessage(new HeartbeatCommand());
  480. }
  481. }
  482. }