Skip to content

Commit

Permalink
Sloppily implement audio decoders
Browse files Browse the repository at this point in the history
  • Loading branch information
OoLunar committed Mar 20, 2024
1 parent a71a128 commit ae31a89
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 18 deletions.
11 changes: 11 additions & 0 deletions src/DSharpPlus.VoiceLink/AudioDecoders/IAudioDecoder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using System;

namespace DSharpPlus.VoiceLink.AudioDecoders
{
public delegate IAudioDecoder AudioDecoderFactory(IServiceProvider serviceProvider);
public interface IAudioDecoder
{
public int GetMaxBufferSize();
public int Decode(bool hasPacketLoss, ReadOnlySpan<byte> input, Span<byte> output);
}
}
18 changes: 18 additions & 0 deletions src/DSharpPlus.VoiceLink/AudioDecoders/OpusAudioDecoder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System;

namespace DSharpPlus.VoiceLink.AudioDecoders
{
public class OpusAudioDecoder : IAudioDecoder
{
private const int CHANNELS = 2;
private const int MAX_FRAME_SIZE = 5760;
private const int MAX_BUFFER_SIZE = MAX_FRAME_SIZE * 2 * CHANNELS;

public int GetMaxBufferSize() => MAX_BUFFER_SIZE;
public int Decode(bool hasPacketLoss, ReadOnlySpan<byte> input, Span<byte> output)
{
input.CopyTo(output);
return input.Length;
}
}
}
32 changes: 32 additions & 0 deletions src/DSharpPlus.VoiceLink/AudioDecoders/Pcm16BitAudioDecoder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using System;
using DSharpPlus.VoiceLink.Opus;

namespace DSharpPlus.VoiceLink.AudioDecoders
{
public class Pcm16BitAudioDecoder : IAudioDecoder
{
// 48 kHz
private const int SAMPLE_RATE = 48000;

// 20 milliseconds
private const double FRAME_DURATION = 0.020;

// 960 samples
private const int FRAME_SIZE = (int)(SAMPLE_RATE * FRAME_DURATION);

// Stereo audio + opus PCM units are 16 bits
private const int BUFFER_SIZE = FRAME_SIZE * 2 * sizeof(short);

/// <inheritdoc/>
public int GetMaxBufferSize() => BUFFER_SIZE;

private OpusDecoder _opusDecoder { get; init; } = OpusDecoder.Create(OpusSampleRate.Opus48000Hz, 2);

/// <inheritdoc/>
public int Decode(bool hasPacketLoss, ReadOnlySpan<byte> input, Span<byte> output)
{
_opusDecoder.Decode(input, output, FRAME_SIZE, hasPacketLoss);
return BUFFER_SIZE;
}
}
}
2 changes: 2 additions & 0 deletions src/DSharpPlus.VoiceLink/VoiceLinkConfiguration.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using DSharpPlus.VoiceLink.AudioDecoders;
using DSharpPlus.VoiceLink.VoiceEncrypters;
using Microsoft.Extensions.DependencyInjection;

Expand All @@ -11,5 +12,6 @@ public sealed record VoiceLinkConfiguration
private IServiceProvider? _serviceProvider;
public int MaxHeartbeatQueueSize { get; set; } = 5;
public IVoiceEncrypter VoiceEncrypter { get; set; } = new XSalsa20Poly1305();
public AudioDecoderFactory AudioDecoderFactory { get; set; } = _ => new Pcm16BitAudioDecoder();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ private static async ValueTask ReadyAsync(VoiceLinkConnection connection, ReadRe

// Insert our SSRC code
connection._logger.LogDebug("Connection {GuildId}: Bot's SSRC code is {Ssrc}.", connection.Guild.Id, voiceReadyPayload.Ssrc);
connection._speakers.Add(voiceReadyPayload.Ssrc, new(connection, voiceReadyPayload.Ssrc, connection.Member));
connection._speakers.Add(voiceReadyPayload.Ssrc, new(connection, voiceReadyPayload.Ssrc, connection.Member, connection._audioDecoderFactory(connection.Extension.Configuration.ServiceProvider)));

// Setup UDP while also doing ip discovery
connection._logger.LogDebug("Connection {GuildId}: Setting up UDP, sending ip discovery...", connection.Guild.Id);
Expand Down Expand Up @@ -174,7 +174,7 @@ private static async ValueTask SpeakingAsync(VoiceLinkConnection connection, Rea
// When we receive the speaking payload, we update the user's member object.
if (!connection._speakers.TryGetValue(voiceSpeakingPayload.Ssrc, out VoiceLinkUser? voiceLinkUser))
{
voiceLinkUser = new(connection, voiceSpeakingPayload.Ssrc, await connection.Guild.GetMemberAsync(voiceSpeakingPayload.UserId));
voiceLinkUser = new(connection, voiceSpeakingPayload.Ssrc, await connection.Guild.GetMemberAsync(voiceSpeakingPayload.UserId), connection._audioDecoderFactory(connection.Extension.Configuration.ServiceProvider));
connection._speakers.TryAdd(voiceSpeakingPayload.Ssrc, voiceLinkUser);
}
else
Expand Down
25 changes: 12 additions & 13 deletions src/DSharpPlus.VoiceLink/VoiceLinkConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
using DSharpPlus.Entities;
using DSharpPlus.EventArgs;
using DSharpPlus.Net.Abstractions;
using DSharpPlus.VoiceLink.AudioDecoders;
using DSharpPlus.VoiceLink.Commands;
using DSharpPlus.VoiceLink.Enums;
using DSharpPlus.VoiceLink.Payloads;
Expand Down Expand Up @@ -42,6 +43,7 @@ public sealed partial class VoiceLinkConnection
private CancellationTokenSource _cancellationTokenSource { get; init; } = new();
private Dictionary<uint, VoiceLinkUser> _speakers { get; init; } = [];
private IVoiceEncrypter _voiceEncrypter { get; init; }
private AudioDecoderFactory _audioDecoderFactory { get; init; }
private byte[] _secretKey { get; set; } = [];
private Pipe _audioPipe { get; init; } = new();

Expand All @@ -62,6 +64,7 @@ public VoiceLinkConnection(VoiceLinkExtension extension, DiscordChannel channel,
Channel = channel;
_logger = extension.Configuration.ServiceProvider.GetRequiredService<ILogger<VoiceLinkConnection>>();
_voiceEncrypter = extension.Configuration.VoiceEncrypter;
_audioDecoderFactory = extension.Configuration.AudioDecoderFactory;
}

public async ValueTask DisconnectAsync()
Expand Down Expand Up @@ -373,7 +376,7 @@ private void HandleRtcpReceiverReportPacket(byte[] buffer)
// We're explicitly passing a null member, however the dev should never expect this to
// be null as the speaking event should always fire once we receive both the user and the ssrc.
// TL;DR, this is to ensure we never lose any audio data.
voiceLinkUser = new(this, rtpHeader.Ssrc, null!);
voiceLinkUser = new(this, rtpHeader.Ssrc, null!, _audioDecoderFactory(Extension.Configuration.ServiceProvider), rtpHeader.Sequence);
_speakers.Add(rtpHeader.Ssrc, voiceLinkUser);
}

Expand Down Expand Up @@ -402,27 +405,23 @@ private void HandleRtcpReceiverReportPacket(byte[] buffer)
decryptedAudio = decryptedAudio[(4 + (4 * extensionLength))..];
}

// TODO: Handle FEC (Forward Error Correction) aka packet loss.
// * https://tools.ietf.org/html/rfc5109
bool hasPacketLoss = voiceLinkUser.UpdateSequence(rtpHeader.Sequence);

// Decode the audio
try
{
// Calculate the frame size and buffer size
const int sampleRate = 48000; // 48 kHz
const double frameDuration = 0.020; // 20 milliseconds
const int frameSize = (int)(sampleRate * frameDuration); // 960 samples
const int bufferSize = frameSize * 2 * sizeof(short); // Stereo audio + opus PCM units are 16 bits

// TODO: Handle FEC (Forward Error Correction) aka packet loss.
// * https://tools.ietf.org/html/rfc5109
bool hasPacketLoss = voiceLinkUser.UpdateSequence(rtpHeader.Sequence);
int maxBufferSize = voiceLinkUser.AudioDecoder.GetMaxBufferSize();

// Allocate the buffer for the PCM data
Span<byte> audioBuffer = voiceLinkUser._audioPipe.Writer.GetSpan(bufferSize);
Span<byte> audioBuffer = voiceLinkUser._audioPipe.Writer.GetSpan(maxBufferSize);

// Decode the Opus packet
voiceLinkUser._opusDecoder.Decode(decryptedAudio, audioBuffer, frameSize, hasPacketLoss);
int writtenBytes = voiceLinkUser.AudioDecoder.Decode(hasPacketLoss, decryptedAudio, audioBuffer);

// Write the audio to the pipe
voiceLinkUser._audioPipe.Writer.Advance(bufferSize);
voiceLinkUser._audioPipe.Writer.Advance(writtenBytes);
}
catch (Exception error)
{
Expand Down
7 changes: 4 additions & 3 deletions src/DSharpPlus.VoiceLink/VoiceLinkUser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
using System.IO;
using System.IO.Pipelines;
using DSharpPlus.Entities;
using DSharpPlus.VoiceLink.AudioDecoders;
using DSharpPlus.VoiceLink.Enums;
using DSharpPlus.VoiceLink.Opus;

namespace DSharpPlus.VoiceLink
{
Expand All @@ -13,18 +13,19 @@ public sealed record VoiceLinkUser
public uint Ssrc { get; init; }
public DiscordMember Member { get; internal set; }
public VoiceSpeakingIndicators VoiceIndication { get; internal set; } = VoiceSpeakingIndicators.None;
public IAudioDecoder AudioDecoder { get; init; }
public PipeReader AudioPipe => _audioPipe.Reader;
public Stream AudioStream => _audioPipe.Reader.AsStream(true);

internal Pipe _audioPipe { get; init; } = new();
internal OpusDecoder _opusDecoder { get; init; } = OpusDecoder.Create(OpusSampleRate.Opus48000Hz, 2);
internal ushort _lastSequence;

public VoiceLinkUser(VoiceLinkConnection connection, uint ssrc, DiscordMember member, ushort sequence = 0)
public VoiceLinkUser(VoiceLinkConnection connection, uint ssrc, DiscordMember member, IAudioDecoder audioDecoder, ushort sequence = 0)
{
Connection = connection ?? throw new ArgumentNullException(nameof(connection));
Ssrc = ssrc;
Member = member;
AudioDecoder = audioDecoder ?? throw new ArgumentNullException(nameof(audioDecoder));
_lastSequence = sequence;
}

Expand Down

0 comments on commit ae31a89

Please sign in to comment.