diff --git a/examples/HelloWorld/src/Program.cs b/examples/HelloWorld/src/Program.cs index 5ff4fb1..09826ff 100644 --- a/examples/HelloWorld/src/Program.cs +++ b/examples/HelloWorld/src/Program.cs @@ -4,6 +4,7 @@ using System.IO; using System.Threading.Tasks; using DSharpPlus.VoiceLink.Enums; +using DSharpPlus.VoiceLink.VoiceEncrypters; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Serilog; diff --git a/src/DSharpPlus.VoiceLink/DSharpPlus.VoiceLink.csproj b/src/DSharpPlus.VoiceLink/DSharpPlus.VoiceLink.csproj index 642339b..81937bb 100644 --- a/src/DSharpPlus.VoiceLink/DSharpPlus.VoiceLink.csproj +++ b/src/DSharpPlus.VoiceLink/DSharpPlus.VoiceLink.csproj @@ -16,7 +16,6 @@ - @@ -26,4 +25,7 @@ - \ No newline at end of file + + + + diff --git a/src/DSharpPlus.VoiceLink/Opus/OpusDecoder.cs b/src/DSharpPlus.VoiceLink/Opus/OpusDecoder.cs index b6d21e1..8851b9d 100644 --- a/src/DSharpPlus.VoiceLink/Opus/OpusDecoder.cs +++ b/src/DSharpPlus.VoiceLink/Opus/OpusDecoder.cs @@ -31,15 +31,22 @@ public unsafe void Init(OpusSampleRate sampleRate, int channels) } } - /// - public unsafe int Decode(ReadOnlySpan data, ref Span pcm, int frameSize, bool decodeFec) + /// + public unsafe int Decode(ReadOnlySpan data, Span pcm, bool decodeFec) { int decodedLength; fixed (OpusDecoder* pinned = &this) fixed (byte* dataPointer = data) - fixed (short* pcmPointer = pcm) + fixed (byte* pcmPointer = pcm) { - decodedLength = OpusNativeMethods.Decode(pinned, dataPointer, data.Length, pcmPointer, frameSize, decodeFec ? 1 : 0); + decodedLength = OpusNativeMethods.Decode( + pinned, + dataPointer, + data.Length, + pcmPointer, + OpusNativeMethods.PacketGetNbFrames(dataPointer, data.Length), + decodeFec ? 1 : 0 + ); } // Less than zero means an error occurred @@ -48,16 +55,17 @@ public unsafe int Decode(ReadOnlySpan data, ref Span pcm, int frame throw new OpusException((OpusErrorCode)decodedLength); } - return decodedLength * sizeof(short) * 2; // Multiplied by the sample size, which is size of short times the number of channels + // Multiplied by the sample size, which is size of short times the number of channels + return decodedLength * sizeof(short) * 2; } /// - public unsafe int DecodeFloat(ReadOnlySpan data, ref Span pcm, int frameSize, bool decodeFec) + public unsafe int DecodeFloat(ReadOnlySpan data, Span pcm, int frameSize, bool decodeFec) { int decodedLength; fixed (OpusDecoder* pinned = &this) fixed (byte* dataPointer = data) - fixed (float* pcmPointer = pcm) + fixed (byte* pcmPointer = pcm) { decodedLength = OpusNativeMethods.DecodeFloat(pinned, dataPointer, data.Length, pcmPointer, frameSize, decodeFec ? 1 : 0); } diff --git a/src/DSharpPlus.VoiceLink/Opus/OpusException.cs b/src/DSharpPlus.VoiceLink/Opus/OpusException.cs index 5ca0e17..5ef7b76 100644 --- a/src/DSharpPlus.VoiceLink/Opus/OpusException.cs +++ b/src/DSharpPlus.VoiceLink/Opus/OpusException.cs @@ -31,7 +31,7 @@ public sealed class OpusException : Exception OpusErrorCode.Unimplemented => "Error -5: Invalid/unsupported request number.", OpusErrorCode.InvalidState => "Error -6: An encoder or decoder structure is invalid or already freed.", OpusErrorCode.AllocFail => "Error -7: Memory allocation has failed.", - _ => "Unknown error." + _ => $"Error {(int)errorCode}: Unknown error." }; } } diff --git a/src/DSharpPlus.VoiceLink/Opus/OpusNativeMethods.Decoder.cs b/src/DSharpPlus.VoiceLink/Opus/OpusNativeMethods.Decoder.cs index 65f9343..85b7bbe 100644 --- a/src/DSharpPlus.VoiceLink/Opus/OpusNativeMethods.Decoder.cs +++ b/src/DSharpPlus.VoiceLink/Opus/OpusNativeMethods.Decoder.cs @@ -43,11 +43,11 @@ internal static partial class OpusNativeMethods /// Flag (0 or 1) to request that any in-band forward error correction data be decoded. If no such data is available, the frame is decoded as if it were lost. /// Number of decoded samples or an [LibraryImport("opus", EntryPoint = "opus_decode")] - public static unsafe partial int Decode(OpusDecoder* decoder, byte* data, int length, short* pcm, int frameSize, int decodeFec); + public static unsafe partial int Decode(OpusDecoder* decoder, byte* data, int length, byte* pcm, int frameSize, int decodeFec); /// [LibraryImport("opus", EntryPoint = "opus_decode_float")] - public static unsafe partial int DecodeFloat(OpusDecoder* decoder, byte* data, int length, float* pcm, int frameSize, int decodeFec); + public static unsafe partial int DecodeFloat(OpusDecoder* decoder, byte* data, int length, byte* pcm, int frameSize, int decodeFec); /// /// Perform a CTL function on an Opus decoder. diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtcpHeader.cs b/src/DSharpPlus.VoiceLink/Rtp/RtcpHeader.cs new file mode 100644 index 0000000..fa6e884 --- /dev/null +++ b/src/DSharpPlus.VoiceLink/Rtp/RtcpHeader.cs @@ -0,0 +1,57 @@ +using System; +using System.Buffers.Binary; + +namespace DSharpPlus.VoiceLink.Rtp +{ + public readonly record struct RtcpHeader + { + /// + /// Gets the version of the RTCP header. + /// + public int Version { get; init; } + + /// + /// Gets whether the RTCP header has padding. + /// + public int Padding { get; init; } + + /// + /// Gets the report count of the RTCP header. + /// + public int ReportCount { get; init; } + + /// + /// Gets the packet type of the RTCP header. + /// + public int PacketType { get; init; } + + /// + /// Gets the length of the RTCP header. + /// + public int Length { get; init; } + + /// + /// Gets the SSRC of the RTCP header. + /// + public uint Ssrc { get; init; } + + public RtcpHeader(ReadOnlySpan data) + { + if (data.Length < 8) + { + throw new ArgumentException("The source buffer must have a minimum of 8 bytes for it to be a RTCP header.", nameof(data)); + } + else if (data[1] != 201) + { + throw new ArgumentException("The source buffer must contain a RTCP receiver report.", nameof(data)); + } + + Version = data[0] >> 6; + Padding = (data[0] >> 5) & 0b00000001; + ReportCount = data[0] & 0b00011111; + PacketType = data[1]; + Length = BinaryPrimitives.ReadUInt16BigEndian(data[2..4]); + Ssrc = BinaryPrimitives.ReadUInt32BigEndian(data[4..8]); + } + } +} diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtcpReceiverReportPacket.cs b/src/DSharpPlus.VoiceLink/Rtp/RtcpReceiverReportPacket.cs new file mode 100644 index 0000000..642feef --- /dev/null +++ b/src/DSharpPlus.VoiceLink/Rtp/RtcpReceiverReportPacket.cs @@ -0,0 +1,31 @@ +using System; +using System.Collections.Generic; + +namespace DSharpPlus.VoiceLink.Rtp +{ + public readonly record struct RtcpReceiverReportPacket + { + /// + /// Gets the header of the RTCP packet. + /// + public RtcpHeader Header { get; init; } + + /// + /// Gets the report blocks of the RTCP packet. + /// + public IReadOnlyList ReportBlocks { get; init; } + + public RtcpReceiverReportPacket(RtcpHeader header, ReadOnlySpan data) + { + List reportBlocks = new(header.ReportCount); + for (int i = 0; i < header.ReportCount; i++) + { + reportBlocks.Add(new RtcpReportBlock(data)); + data = data[24..]; + } + + Header = header; + ReportBlocks = reportBlocks; + } + } +} diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtcpReportBlock.cs b/src/DSharpPlus.VoiceLink/Rtp/RtcpReportBlock.cs new file mode 100644 index 0000000..01c0e4a --- /dev/null +++ b/src/DSharpPlus.VoiceLink/Rtp/RtcpReportBlock.cs @@ -0,0 +1,27 @@ +using System; +using System.Buffers.Binary; + +namespace DSharpPlus.VoiceLink.Rtp +{ + public readonly record struct RtcpReportBlock + { + public ushort SynchronizationSource { get; } + public ushort FractionLost { get; } + public uint CumulativePacketsLost { get; } + public uint ExtendedHighestSequenceNumberReceived { get; } + public uint InterarrivalJitter { get; } + public uint LastSenderReport { get; } + public uint DelaySinceLastSenderReport { get; } + + public RtcpReportBlock(ReadOnlySpan data) + { + SynchronizationSource = BinaryPrimitives.ReadUInt16BigEndian(data); + FractionLost = data[2]; + CumulativePacketsLost = BinaryPrimitives.ReadUInt32BigEndian(data[3..]); + ExtendedHighestSequenceNumberReceived = BinaryPrimitives.ReadUInt32BigEndian(data[7..]); + InterarrivalJitter = BinaryPrimitives.ReadUInt32BigEndian(data[11..]); + LastSenderReport = BinaryPrimitives.ReadUInt32BigEndian(data[15..]); + DelaySinceLastSenderReport = BinaryPrimitives.ReadUInt32BigEndian(data[19..]); + } + } +} diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtcpUtilities.cs b/src/DSharpPlus.VoiceLink/Rtp/RtcpUtilities.cs new file mode 100644 index 0000000..a81f9a0 --- /dev/null +++ b/src/DSharpPlus.VoiceLink/Rtp/RtcpUtilities.cs @@ -0,0 +1,28 @@ +using System; + +namespace DSharpPlus.VoiceLink.Rtp +{ + public static class RtcpUtilities + { + /// + /// Determines if the given buffer contains a valid RTCP header. + /// + /// The data to reference. + /// Whether the data contains a valid RTCP header. + public static bool IsRtcpReceiverReport(ReadOnlySpan source) => source.Length >= 8 && source[1] == 201; + + public static RtcpHeader DecodeHeader(ReadOnlySpan source) + { + if (source.Length < 8) + { + throw new ArgumentException("The source buffer must have a minimum of 8 bytes for it to be a RTCP header.", nameof(source)); + } + else if (source[1] != 201) + { + throw new ArgumentException("The source buffer must contain a RTCP receiver report.", nameof(source)); + } + + return new RtcpHeader(source); + } + } +} diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtpHeader.cs b/src/DSharpPlus.VoiceLink/Rtp/RtpHeader.cs index 55cc9bd..adfd17f 100644 --- a/src/DSharpPlus.VoiceLink/Rtp/RtpHeader.cs +++ b/src/DSharpPlus.VoiceLink/Rtp/RtpHeader.cs @@ -1,9 +1,29 @@ namespace DSharpPlus.VoiceLink.Rtp { - public readonly struct RtpHeader + public readonly record struct RtpHeader { + public byte FirstMetadata { get; init; } + public byte SecondMetadata { get; init; } public ushort Sequence { get; init; } public uint Timestamp { get; init; } public uint Ssrc { get; init; } + + // The version is the first two bits of the first byte. + public byte Version => (byte)(FirstMetadata & 0b11000000); + + // The padding bit is the third bit of the first byte. + public bool HasPadding => (FirstMetadata & 0b00100000) != 0; + + // The extension bit is the fourth bit of the first byte. + public bool HasExtension => (FirstMetadata & 0b00010000) != 0; + + // The CSRC count is the last four bits of the first byte. + public byte CsrcCount => (byte)((FirstMetadata & 0b00001111) >> 4); + + // The marker bit is the first bit of the second byte. + public bool HasMarker => (SecondMetadata & 0b10000000) != 0; + + // The payload type is the last seven bits of the second byte. + public byte PayloadType => (byte)(SecondMetadata & 0b01111111); } } diff --git a/src/DSharpPlus.VoiceLink/Rtp/RtpUtilities.cs b/src/DSharpPlus.VoiceLink/Rtp/RtpUtilities.cs index cf23e8c..6bd696a 100644 --- a/src/DSharpPlus.VoiceLink/Rtp/RtpUtilities.cs +++ b/src/DSharpPlus.VoiceLink/Rtp/RtpUtilities.cs @@ -11,6 +11,7 @@ public static class RtpUtilities public const byte VersionWithExtension = 0x90; public const byte Version = 0x80; public const byte PayloadType = 0x78; + public static readonly byte[] RtpExtensionOneByte = [190, 222]; /// /// Determines if the given buffer contains a valid RTP header. @@ -67,6 +68,8 @@ public static RtpHeader DecodeHeader(ReadOnlySpan source) return new RtpHeader() { + FirstMetadata = source[0], + SecondMetadata = source[1], Sequence = BinaryPrimitives.ReadUInt16BigEndian(source[2..4]), Timestamp = BinaryPrimitives.ReadUInt32BigEndian(source[4..8]), Ssrc = BinaryPrimitives.ReadUInt32BigEndian(source[8..12]) diff --git a/src/DSharpPlus.VoiceLink/VoiceEncrypters/IVoiceEncrypter.cs b/src/DSharpPlus.VoiceLink/VoiceEncrypters/IVoiceEncrypter.cs index f90aaea..6540ad7 100644 --- a/src/DSharpPlus.VoiceLink/VoiceEncrypters/IVoiceEncrypter.cs +++ b/src/DSharpPlus.VoiceLink/VoiceEncrypters/IVoiceEncrypter.cs @@ -10,7 +10,8 @@ public interface IVoiceEncrypter int GetEncryptedSize(int length); int GetDecryptedSize(int length); - bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target); - bool Decrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target); + bool TryEncryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target); + bool TryDecryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target); + bool TryDecryptReportPacket(ReadOnlySpan data, ReadOnlySpan key, Span target) => false; } } diff --git a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305.cs b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305.cs index 16581b1..8f85259 100644 --- a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305.cs +++ b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305.cs @@ -15,7 +15,7 @@ public sealed record XSalsa20Poly1305 : IVoiceEncrypter public int GetEncryptedSize(int length) => length + SodiumXSalsa20Poly1305.MacSize; public int GetDecryptedSize(int length) => length - SodiumXSalsa20Poly1305.MacSize; - public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryEncryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { @@ -35,10 +35,10 @@ public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOn target[..12].CopyTo(nonce); // Encrypt the data - return SodiumXSalsa20Poly1305.Encrypt(data, key, nonce, target[24..]) == 0; + return SodiumXSalsa20Poly1305.Encrypt(data, key, nonce, target[12..]) == 0; } - public bool Decrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryDecryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { diff --git a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Lite.cs b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Lite.cs index 2394596..d199c97 100644 --- a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Lite.cs +++ b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Lite.cs @@ -23,7 +23,7 @@ public sealed record XSalsa20Poly1305Lite : IVoiceEncrypter public int GetEncryptedSize(int length) => length + SodiumXSalsa20Poly1305.MacSize; public int GetDecryptedSize(int length) => length - SodiumXSalsa20Poly1305.MacSize; - public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryEncryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { @@ -47,7 +47,7 @@ public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOn return SodiumXSalsa20Poly1305.Encrypt(data, key, nonce, target[16..]) == 0; } - public bool Decrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryDecryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { diff --git a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Suffix.cs b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Suffix.cs index bcad73a..076d61b 100644 --- a/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Suffix.cs +++ b/src/DSharpPlus.VoiceLink/VoiceEncrypters/XSalsa20Poly1305Suffix.cs @@ -16,7 +16,7 @@ public sealed record XSalsa20Poly1305Suffix : IVoiceEncrypter public int GetEncryptedSize(int length) => length + SodiumXSalsa20Poly1305.MacSize; public int GetDecryptedSize(int length) => length - SodiumXSalsa20Poly1305.MacSize; - public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryEncryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { @@ -40,7 +40,7 @@ public bool Encrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOn return SodiumXSalsa20Poly1305.Encrypt(data, key, nonce, target[36..]) == 0; } - public bool Decrypt(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) + public bool TryDecryptOpusPacket(VoiceLinkUser voiceLinkUser, ReadOnlySpan data, ReadOnlySpan key, Span target) { if (data.Length < SodiumXSalsa20Poly1305.MacSize) { diff --git a/src/DSharpPlus.VoiceLink/VoiceLinkConfiguration.cs b/src/DSharpPlus.VoiceLink/VoiceLinkConfiguration.cs index 953ab37..f9fe573 100644 --- a/src/DSharpPlus.VoiceLink/VoiceLinkConfiguration.cs +++ b/src/DSharpPlus.VoiceLink/VoiceLinkConfiguration.cs @@ -1,6 +1,4 @@ using System; -using System.Net; -using DSharpPlus.Net.WebSocket; using DSharpPlus.VoiceLink.VoiceEncrypters; using Microsoft.Extensions.DependencyInjection; @@ -11,15 +9,6 @@ public sealed record VoiceLinkConfiguration public IServiceCollection ServiceCollection { get; set; } = new ServiceCollection(); internal IServiceProvider ServiceProvider => _serviceProvider ??= ServiceCollection.BuildServiceProvider(); private IServiceProvider? _serviceProvider; - - public IWebProxy? Proxy { get; set; } - public WebSocketClientFactoryDelegate WebSocketClientFactory - { - internal get => _webSocketClientFactory; - set => _webSocketClientFactory = value is null ? throw new ArgumentNullException(nameof(value)) : value; - } - private WebSocketClientFactoryDelegate _webSocketClientFactory = WebSocketClient.CreateNew; - public int MaxHeartbeatQueueSize { get; set; } = 5; public IVoiceEncrypter VoiceEncrypter { get; set; } = new XSalsa20Poly1305(); } diff --git a/src/DSharpPlus.VoiceLink/VoiceLinkConnection.cs b/src/DSharpPlus.VoiceLink/VoiceLinkConnection.cs index 462f087..af9d8c3 100644 --- a/src/DSharpPlus.VoiceLink/VoiceLinkConnection.cs +++ b/src/DSharpPlus.VoiceLink/VoiceLinkConnection.cs @@ -7,8 +7,6 @@ using System.Linq; using System.Net.Sockets; using System.Net.WebSockets; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Text.Json; using System.Threading; using System.Threading.Tasks; @@ -20,6 +18,7 @@ using DSharpPlus.VoiceLink.EventArgs; using DSharpPlus.VoiceLink.Payloads; using DSharpPlus.VoiceLink.Rtp; +using DSharpPlus.VoiceLink.Sodium; using DSharpPlus.VoiceLink.VoiceEncrypters; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; @@ -140,12 +139,12 @@ public async ValueTask InitializeAsync(VoiceStateUpdateEventArgs voiceStateUpdat Query = $"v=4&encoding=json" }.Uri; - _logger.LogDebug("Connection {GuildId}: Connecting to {Endpoint}", Guild.Id, _endpoint); + _logger.LogDebug("Connection {GuildId}: Connecting to {Endpoint}...", Guild.Id, _endpoint); await _webSocket.ConnectAsync(_endpoint, cancellationToken); - _logger.LogDebug("Connection {GuildId}: Connected to {Endpoint}", Guild.Id, _endpoint); + _logger.LogDebug("Connection {GuildId}: Connected to {Endpoint}.", Guild.Id, _endpoint); // Start receiving events - _logger.LogDebug("Connection {GuildId}: Starting voice gateway loop", Guild.Id); + _logger.LogDebug("Connection {GuildId}: Starting voice gateway loop...", Guild.Id); _ = StartVoiceGatewayAsync(); // Wait until we can start sending data. @@ -159,12 +158,11 @@ static VoiceOpCode ParseVoiceOpCode(ReadResult readResult) Utf8JsonReader utf8JsonReader = new(readResult.Buffer); while (utf8JsonReader.Read()) { - if (utf8JsonReader.TokenType != JsonTokenType.PropertyName || utf8JsonReader.GetString() != "op") + if (utf8JsonReader.TokenType != JsonTokenType.PropertyName || utf8JsonReader.GetString() != "op" || !utf8JsonReader.Read()) { continue; } - utf8JsonReader.Read(); return (VoiceOpCode)utf8JsonReader.GetInt32(); } @@ -178,7 +176,7 @@ static VoiceOpCode ParseVoiceOpCode(ReadResult readResult) await _webSocket.ReadAsync(_websocketPipe.Writer, _cancellationTokenSource.Token); ReadResult readResult = await _websocketPipe.Reader.ReadAsync(_cancellationTokenSource.Token); VoiceOpCode voiceOpCode = ParseVoiceOpCode(readResult); - _logger.LogTrace("Connection {GuildId}: Received {VoiceOpCode}", Guild.Id, voiceOpCode); + _logger.LogTrace("Connection {GuildId}: Received {VoiceOpCode}.", Guild.Id, voiceOpCode); // TODO: Maybe dictionary of delegates? // Dictionary> handlers = new(); @@ -187,11 +185,11 @@ static VoiceOpCode ParseVoiceOpCode(ReadResult readResult) { case VoiceOpCode.Hello: // Start heartbeat - _logger.LogDebug("Connection {GuildId}: Starting heartbeat", Guild.Id); + _logger.LogDebug("Connection {GuildId}: Starting heartbeat...", Guild.Id); _ = SendHeartbeatAsync((await _websocketPipe.Reader.ParseAsync>(readResult)).Data); // Send Identify - _logger.LogTrace("Connection {GuildId}: Sending identify", Guild.Id); + _logger.LogTrace("Connection {GuildId}: Sending identify...", Guild.Id); await _webSocket.SendAsync(new VoiceGatewayDispatch() { OpCode = VoiceOpCode.Identify, @@ -208,12 +206,12 @@ await _webSocket.SendAsync(new VoiceGatewayDispatch() VoiceReadyPayload voiceReadyPayload = (await _websocketPipe.Reader.ParseAsync>(readResult)).Data; // Insert our SSRC code - _logger.LogDebug("Connection {GuildId}: Bot's SSRC code is {Ssrc}", Guild.Id, voiceReadyPayload.Ssrc); + _logger.LogDebug("Connection {GuildId}: Bot's SSRC code is {Ssrc}.", Guild.Id, voiceReadyPayload.Ssrc); _speakers.Add(voiceReadyPayload.Ssrc, new(this, voiceReadyPayload.Ssrc, Member)); // Setup UDP while also doing ip discovery - _logger.LogDebug("Connection {GuildId}: Setting up UDP, sending ip discovery", Guild.Id); - byte[] ipDiscovery = new DiscordIPDiscovery(0x01, 70, voiceReadyPayload.Ssrc, string.Empty, default); + _logger.LogDebug("Connection {GuildId}: Setting up UDP, sending ip discovery...", Guild.Id); + byte[] ipDiscovery = new DiscordIpDiscoveryPacket(0x01, 70, voiceReadyPayload.Ssrc, string.Empty, default); _udpClient.Connect(voiceReadyPayload.Ip, voiceReadyPayload.Port); await _udpClient.SendAsync(ipDiscovery, _cancellationTokenSource.Token); @@ -224,9 +222,9 @@ await _webSocket.SendAsync(new VoiceGatewayDispatch() throw new InvalidOperationException("Received invalid IP Discovery Response."); } - DiscordIPDiscovery reply = result.Buffer; - _logger.LogDebug("Connection {GuildId}: Received ip discovery response {Reply}", Guild.Id, reply); - _logger.LogTrace("Connection {GuildId}: Sending select protocol", Guild.Id); + DiscordIpDiscoveryPacket reply = result.Buffer; + _logger.LogDebug("Connection {GuildId}: Received ip discovery response: {Reply}", Guild.Id, reply); + _logger.LogTrace("Connection {GuildId}: Sending select protocol...", Guild.Id); await _webSocket.SendAsync(new() { OpCode = VoiceOpCode.SelectProtocol, @@ -262,11 +260,11 @@ await _webSocket.SendAsync(new() } break; case VoiceOpCode.Resumed: - _logger.LogInformation("Connection {GuildId}: Resumed", Guild.Id); + _logger.LogInformation("Connection {GuildId}: Resumed.", Guild.Id); break; case VoiceOpCode.ClientConnected: VoiceUserJoinPayload voiceClientConnectedPayload = (await _websocketPipe.Reader.ParseAsync>(readResult)).Data; - _logger.LogDebug("Connection {GuildId}: User {UserId} connected", Guild.Id, voiceClientConnectedPayload.UserId); + _logger.LogDebug("Connection {GuildId}: User {UserId} connected.", Guild.Id, voiceClientConnectedPayload.UserId); await Extension._userConnected.InvokeAsync(Extension, new VoiceLinkUserEventArgs() { Connection = this, @@ -275,7 +273,7 @@ await _webSocket.SendAsync(new() break; case VoiceOpCode.ClientDisconnect: VoiceUserLeavePayload voiceClientDisconnectedPayload = (await _websocketPipe.Reader.ParseAsync>(readResult)).Data; - _logger.LogDebug("Connection {GuildId}: User {UserId} disconnected", Guild.Id, voiceClientDisconnectedPayload.UserId); + _logger.LogDebug("Connection {GuildId}: User {UserId} disconnected.", Guild.Id, voiceClientDisconnectedPayload.UserId); if (_speakers.FirstOrDefault(x => x.Value.Member.Id == voiceClientDisconnectedPayload.UserId) is KeyValuePair kvp) { _speakers.Remove(kvp.Key); @@ -289,8 +287,16 @@ await _webSocket.SendAsync(new() break; case VoiceOpCode.Speaking: VoiceSpeakingPayload voiceSpeakingPayload = (await _websocketPipe.Reader.ParseAsync>(readResult)).Data; - _logger.LogTrace("Connection {GuildId}: User {UserId} is speaking", Guild.Id, voiceSpeakingPayload.UserId); - _speakers.Add(voiceSpeakingPayload.Ssrc, new(this, voiceSpeakingPayload.Ssrc, await Guild.GetMemberAsync(voiceSpeakingPayload.UserId))); + _logger.LogTrace("Connection {GuildId}: User {UserId} is speaking.", Guild.Id, voiceSpeakingPayload.UserId); + if (!_speakers.TryGetValue(voiceSpeakingPayload.Ssrc, out VoiceLinkUser? voiceLinkUser)) + { + _speakers.Add(voiceSpeakingPayload.Ssrc, new(this, voiceSpeakingPayload.Ssrc, await Guild.GetMemberAsync(voiceSpeakingPayload.UserId))); + } + else + { + voiceLinkUser.Member = await Guild.GetMemberAsync(voiceSpeakingPayload.UserId); + } + await Extension._userSpeaking.InvokeAsync(Extension, new VoiceLinkUserSpeakingEventArgs() { Connection = this, @@ -307,14 +313,14 @@ await _webSocket.SendAsync(new() catch (VoiceLinkWebsocketClosedException) when (_webSocket.State is WebSocketState.Connecting) { // In theory this means that resuming failed. We should just restart the connection entirely. - _logger.LogWarning("Connection {GuildId}: Websocket closed, restarting the connection entirely.", Guild.Id); + _logger.LogWarning("Connection {GuildId}: Websocket closed, restarting the connection entirely...", Guild.Id); await ReconnectAsync(); return; } catch (VoiceLinkWebsocketClosedException) when (_webSocket.State is not WebSocketState.Open) { // Attempt to reconnect and resume. If that fails then restart the connection entirely. - _logger.LogWarning("Connection {GuildId}: Websocket closed, attempting to resume.", Guild.Id); + _logger.LogWarning("Connection {GuildId}: Websocket closed, attempting to resume...", Guild.Id); await _webSocket.ConnectAsync(_endpoint!, _cancellationTokenSource.Token); await _webSocket.SendAsync(new DiscordVoiceResumingCommand() { @@ -333,14 +339,14 @@ private async Task SendHeartbeatAsync(VoiceHelloPayload voiceHelloPayload) { if (_heartbeatQueue.Count > Extension.Configuration.MaxHeartbeatQueueSize) { - _logger.LogError("Connection {GuildId}: Heartbeat queue is too large ({MaxHeartbeat}), disconnecting and reconnecting.", Guild.Id, Extension.Configuration.MaxHeartbeatQueueSize); + _logger.LogError("Connection {GuildId}: Heartbeat queue is too large ({MaxHeartbeat}), disconnecting and reconnecting...", Guild.Id, Extension.Configuration.MaxHeartbeatQueueSize); await ReconnectAsync(); return; } long unixTimestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); _heartbeatQueue.Enqueue(unixTimestamp); - _logger.LogTrace("Connection {GuildId}: Sending heartbeat {UnixTimestamp}", Guild.Id, unixTimestamp); + _logger.LogTrace("Connection {GuildId}: Sending heartbeat {UnixTimestamp}...", Guild.Id, unixTimestamp); await _webSocket.SendAsync(new VoiceGatewayDispatch() { @@ -358,32 +364,63 @@ private async Task ReceiveAudioLoopAsync() // This is a hotpath. Any modifications to this code should always // lead to equal or better performance. If you're not sure, don't touch it. UdpReceiveResult udpReceiveResult = await _udpClient.ReceiveAsync(_cancellationTokenSource.Token); - if (udpReceiveResult.Buffer.Length == 8) + + // RTP Header. Additionally packets with a length of 48 bytes are spammed to us, however we don't know what they are. + // We suspect them to be RTCP receiver reports, however we cannot find a way to decode them. + if (!RtpUtilities.IsRtpHeader(udpReceiveResult.Buffer)) { - // Keep alive packet - UdpPing = TimeSpan.FromMilliseconds(Unsafe.As(ref udpReceiveResult.Buffer[0]) - DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()); + if (!RtcpUtilities.IsRtcpReceiverReport(udpReceiveResult.Buffer)) + { + _logger.LogWarning("Connection {GuildId}: Received an unknown packet with a length of {Length} bytes. It is not an RTP Header or a keep alive packet. Skipping.", Guild.Id, udpReceiveResult.Buffer.Length); + continue; + } + + RtcpHeader header = RtcpUtilities.DecodeHeader(udpReceiveResult.Buffer); + Memory data = udpReceiveResult.Buffer; + Memory nonce = ArrayPool.Shared.Rent(24); + data[^4..].CopyTo(nonce); - // TODO: Maybe we should send a keep alive packet back? + Memory payload = data[8..^4]; + Memory result = ArrayPool.Shared.Rent(udpReceiveResult.Buffer.Length - SodiumXSalsa20Poly1305.MacSize); + if (SodiumXSalsa20Poly1305.Decrypt(payload.Span, _secretKey, nonce.Span, result.Span) != 0) + { + //_logger.LogWarning("Connection {GuildId}: Failed to decrypt rtcp receiver report packet, skipping.", Guild.Id); + continue; + } + + RtcpReceiverReportPacket receiverReportPacket = new(header, payload.Span); + //_logger.LogTrace("Connection {GuildId}: Received RTCP receiver report packet: {ReceiverReportPacket}", Guild.Id, receiverReportPacket); + ArrayPool.Shared.Return(nonce.ToArray()); + ArrayPool.Shared.Return(result.ToArray()); continue; } - // RTP Header - if (!RtpUtilities.IsRtpHeader(udpReceiveResult.Buffer)) + RtpHeader rtpHeader = RtpUtilities.DecodeHeader(udpReceiveResult.Buffer); + if (rtpHeader.PayloadType != 120) { - _logger.LogWarning("Connection {GuildId}: Received an unknown packet with a length of {Length} bytes. It is not an RTP Header or a keep alive packet. Skipping.", Guild.Id, udpReceiveResult.Buffer.Length); + _logger.LogWarning("Connection {GuildId}: Received an unknown packet with a payload type of {PayloadType}. Skipping.", Guild.Id, rtpHeader.PayloadType); continue; } - RtpHeader rtpHeader = RtpUtilities.DecodeHeader(udpReceiveResult.Buffer); - if (!_speakers.TryGetValue(rtpHeader.Ssrc, out VoiceLinkUser? voiceLinkUser)) + if (rtpHeader.HasMarker || rtpHeader.HasExtension) { - _logger.LogWarning("Connection {GuildId}: Received audio from unknown user {Ssrc}, skipping.", Guild.Id, rtpHeader.Ssrc); + // All clients send a marker bit when they first connect. For now we're just going to ignore this. continue; } + if (!_speakers.TryGetValue(rtpHeader.Ssrc, out VoiceLinkUser? voiceLinkUser)) + { + // Create a new user if we don't have one + // We're explicitly passing a null member, however the dev should never expect this to + // be null as the speaking event should always fire once we receive both the user and the ssrc. + // TL;DR, this is to ensure we never lose any audio data. + voiceLinkUser = new(this, rtpHeader.Ssrc, null!); + _speakers.Add(rtpHeader.Ssrc, voiceLinkUser); + } + // Decrypt the audio byte[] decryptedAudio = ArrayPool.Shared.Rent(_voiceEncrypter.GetDecryptedSize(udpReceiveResult.Buffer.Length)); - if (!_voiceEncrypter.Decrypt(voiceLinkUser, udpReceiveResult.Buffer, _secretKey, decryptedAudio.AsSpan())) + if (!_voiceEncrypter.TryDecryptOpusPacket(voiceLinkUser, udpReceiveResult.Buffer, _secretKey, decryptedAudio.AsSpan())) { _logger.LogWarning("Connection {GuildId}: Failed to decrypt audio from {Ssrc}, skipping.", Guild.Id, rtpHeader.Ssrc); continue; @@ -391,30 +428,29 @@ private async Task ReceiveAudioLoopAsync() // TODO: Handle FEC (Forward Error Correction) aka packet loss. // * https://tools.ietf.org/html/rfc5109 + bool hasDataLoss = voiceLinkUser.UpdateSequence(rtpHeader.Sequence); // Decode the audio - DecodeOpusAudio(decryptedAudio.AsSpan().TrimEnd((byte)'\0'), voiceLinkUser); + DecodeOpusAudio(decryptedAudio, voiceLinkUser, hasDataLoss); ArrayPool.Shared.Return(decryptedAudio); await voiceLinkUser._audioPipe.Writer.FlushAsync(_cancellationTokenSource.Token); - static void DecodeOpusAudio(ReadOnlySpan opusPacket, VoiceLinkUser voiceLinkUser) + static void DecodeOpusAudio(ReadOnlySpan opusPacket, VoiceLinkUser voiceLinkUser, bool hasPacketLoss = false) { // Calculate the frame size and buffer size - int sampleRate = 48000; // 48 kHz - double frameDuration = 0.020; // 20 milliseconds - int frameSize = (int)(sampleRate * frameDuration); // 960 samples - int bufferSize = frameSize * 2; // Stereo audio + const int sampleRate = 48000; // 48 kHz + const double frameDuration = 0.020; // 20 milliseconds + const int frameSize = (int)(sampleRate * frameDuration); // 960 samples + const int bufferSize = frameSize * 2; // Stereo audio // Allocate the buffer for the PCM data - Span pcmBuffer = new short[bufferSize]; + Span audioBuffer = voiceLinkUser._audioPipe.Writer.GetSpan(bufferSize); // Decode the Opus packet - voiceLinkUser._opusDecoder.Decode(opusPacket, ref pcmBuffer, frameSize, false); + voiceLinkUser._opusDecoder.Decode(opusPacket, audioBuffer, hasPacketLoss); // Write the audio to the pipe - Span audioBuffer = voiceLinkUser._audioPipe.Writer.GetSpan(bufferSize * sizeof(short)); - pcmBuffer.CopyTo(MemoryMarshal.Cast(audioBuffer)); - voiceLinkUser._audioPipe.Writer.Advance(bufferSize * sizeof(short)); + voiceLinkUser._audioPipe.Writer.Advance(bufferSize); } } } diff --git a/src/DSharpPlus.VoiceLink/VoiceLinkUser.cs b/src/DSharpPlus.VoiceLink/VoiceLinkUser.cs index 77b7f70..d220d59 100644 --- a/src/DSharpPlus.VoiceLink/VoiceLinkUser.cs +++ b/src/DSharpPlus.VoiceLink/VoiceLinkUser.cs @@ -10,16 +10,16 @@ namespace DSharpPlus.VoiceLink public sealed record VoiceLinkUser { public VoiceLinkConnection Connection { get; init; } - public uint Ssrc { get; internal set; } + public uint Ssrc { get; init; } public DiscordMember Member { get; internal set; } public VoiceSpeakingIndicators VoiceIndication { get; internal set; } = VoiceSpeakingIndicators.None; public PipeReader AudioPipe => _audioPipe.Reader; public Stream AudioStream => _audioPipe.Reader.AsStream(true); - internal Pipe _audioPipe { get; private set; } = new(); - internal OpusDecoder _opusDecoder { get; set; } = OpusDecoder.Create(OpusSampleRate.Opus48000Hz, 2); - internal ushort _lastSequence { get; set; } - internal uint _lastTimestamp { get; set; } + internal Pipe _audioPipe { get; init; } = new(); + internal OpusDecoder _opusDecoder { get; init; } = OpusDecoder.Create(OpusSampleRate.Opus48000Hz, 2); + internal ushort _lastSequence; + private bool _sequenceInitialized; public VoiceLinkUser(VoiceLinkConnection connection, uint ssrc, DiscordMember member) { @@ -27,5 +27,21 @@ public VoiceLinkUser(VoiceLinkConnection connection, uint ssrc, DiscordMember me Ssrc = ssrc; Member = member; } + + public bool UpdateSequence(ushort sequence) + { + bool hasPacketLoss = unchecked(++_lastSequence) != sequence; + if (hasPacketLoss) + { + _lastSequence = sequence; + if (!_sequenceInitialized) + { + _sequenceInitialized = true; + hasPacketLoss = false; + } + } + + return hasPacketLoss; + } } }